diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-01 03:29:33 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-01 03:29:33 +0300 |
commit | 29d9f30d4ce6c7a38745a54a8cddface10013490 (patch) | |
tree | 85649ba6a7b39203584d8db9365e03f64e62c136 /drivers/net/ethernet/mellanox | |
parent | 56a451b780676bc1cdac011735fe2869fa2e9abf (diff) | |
parent | 7f80ccfe996871ca69648efee74a60ae7ad0dcd9 (diff) | |
download | linux-29d9f30d4ce6c7a38745a54a8cddface10013490.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
"Highlights:
1) Fix the iwlwifi regression, from Johannes Berg.
2) Support BSS coloring and 802.11 encapsulation offloading in
hardware, from John Crispin.
3) Fix some potential Spectre issues in qtnfmac, from Sergey
Matyukevich.
4) Add TTL decrement action to openvswitch, from Matteo Croce.
5) Allow paralleization through flow_action setup by not taking the
RTNL mutex, from Vlad Buslov.
6) A lot of zero-length array to flexible-array conversions, from
Gustavo A. R. Silva.
7) Align XDP statistics names across several drivers for consistency,
from Lorenzo Bianconi.
8) Add various pieces of infrastructure for offloading conntrack, and
make use of it in mlx5 driver, from Paul Blakey.
9) Allow using listening sockets in BPF sockmap, from Jakub Sitnicki.
10) Lots of parallelization improvements during configuration changes
in mlxsw driver, from Ido Schimmel.
11) Add support to devlink for generic packet traps, which report
packets dropped during ACL processing. And use them in mlxsw
driver. From Jiri Pirko.
12) Support bcmgenet on ACPI, from Jeremy Linton.
13) Make BPF compatible with RT, from Thomas Gleixnet, Alexei
Starovoitov, and your's truly.
14) Support XDP meta-data in virtio_net, from Yuya Kusakabe.
15) Fix sysfs permissions when network devices change namespaces, from
Christian Brauner.
16) Add a flags element to ethtool_ops so that drivers can more simply
indicate which coalescing parameters they actually support, and
therefore the generic layer can validate the user's ethtool
request. Use this in all drivers, from Jakub Kicinski.
17) Offload FIFO qdisc in mlxsw, from Petr Machata.
18) Support UDP sockets in sockmap, from Lorenz Bauer.
19) Fix stretch ACK bugs in several TCP congestion control modules,
from Pengcheng Yang.
20) Support virtual functiosn in octeontx2 driver, from Tomasz
Duszynski.
21) Add region operations for devlink and use it in ice driver to dump
NVM contents, from Jacob Keller.
22) Add support for hw offload of MACSEC, from Antoine Tenart.
23) Add support for BPF programs that can be attached to LSM hooks,
from KP Singh.
24) Support for multiple paths, path managers, and counters in MPTCP.
From Peter Krystad, Paolo Abeni, Florian Westphal, Davide Caratti,
and others.
25) More progress on adding the netlink interface to ethtool, from
Michal Kubecek"
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2121 commits)
net: ipv6: rpl_iptunnel: Fix potential memory leak in rpl_do_srh_inline
cxgb4/chcr: nic-tls stats in ethtool
net: dsa: fix oops while probing Marvell DSA switches
net/bpfilter: remove superfluous testing message
net: macb: Fix handling of fixed-link node
net: dsa: ksz: Select KSZ protocol tag
netdevsim: dev: Fix memory leak in nsim_dev_take_snapshot_write
net: stmmac: add EHL 2.5Gbps PCI info and PCI ID
net: stmmac: add EHL PSE0 & PSE1 1Gbps PCI info and PCI ID
net: stmmac: create dwmac-intel.c to contain all Intel platform
net: dsa: bcm_sf2: Support specifying VLAN tag egress rule
net: dsa: bcm_sf2: Add support for matching VLAN TCI
net: dsa: bcm_sf2: Move writing of CFP_DATA(5) into slicing functions
net: dsa: bcm_sf2: Check earlier for FLOW_EXT and FLOW_MAC_EXT
net: dsa: bcm_sf2: Disable learning for ASP port
net: dsa: b53: Deny enslaving port 7 for 7278 into a bridge
net: dsa: b53: Prevent tagged VLAN on port 7 for 7278
net: dsa: b53: Restore VLAN entries upon (re)configuration
net: dsa: bcm_sf2: Fix overflow checks
hv_netvsc: Remove unnecessary round_up for recv_completion_cnt
...
Diffstat (limited to 'drivers/net/ethernet/mellanox')
114 files changed, 9208 insertions, 2847 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c index 64ed725aec28..73eae80e1cb7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c @@ -38,8 +38,21 @@ #define CR_ENABLE_BIT_OFFSET 0xF3F04 #define MAX_NUM_OF_DUMPS_TO_STORE (8) -static const char *region_cr_space_str = "cr-space"; -static const char *region_fw_health_str = "fw-health"; +#define REGION_CR_SPACE "cr-space" +#define REGION_FW_HEALTH "fw-health" + +static const char * const region_cr_space_str = REGION_CR_SPACE; +static const char * const region_fw_health_str = REGION_FW_HEALTH; + +static const struct devlink_region_ops region_cr_space_ops = { + .name = REGION_CR_SPACE, + .destructor = &kvfree, +}; + +static const struct devlink_region_ops region_fw_health_ops = { + .name = REGION_FW_HEALTH, + .destructor = &kvfree, +}; /* Set to true in case cr enable bit was set to true before crdump */ static bool crdump_enbale_bit_set; @@ -99,7 +112,7 @@ static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev, readl(cr_space + offset); err = devlink_region_snapshot_create(crdump->region_crspace, - crspace_data, id, &kvfree); + crspace_data, id); if (err) { kvfree(crspace_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -138,7 +151,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev, readl(health_buf_start + offset); err = devlink_region_snapshot_create(crdump->region_fw_health, - health_data, id, &kvfree); + health_data, id); if (err) { kvfree(health_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -159,6 +172,7 @@ int mlx4_crdump_collect(struct mlx4_dev *dev) struct pci_dev *pdev = dev->persist->pdev; unsigned long cr_res_size; u8 __iomem *cr_space; + int err; u32 id; if (!dev->caps.health_buffer_addrs) { @@ -179,15 +193,22 @@ int mlx4_crdump_collect(struct mlx4_dev *dev) return -ENODEV; } - crdump_enable_crspace_access(dev, cr_space); - /* Get the available snapshot ID for the dumps */ - id = devlink_region_snapshot_id_get(devlink); + err = devlink_region_snapshot_id_get(devlink, &id); + if (err) { + mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", err); + return err; + } + + crdump_enable_crspace_access(dev, cr_space); /* Try to capture dumps */ mlx4_crdump_collect_crspace(dev, cr_space, id); mlx4_crdump_collect_fw_health(dev, cr_space, id); + /* Release reference on the snapshot id */ + devlink_region_snapshot_id_put(devlink, id); + crdump_disable_crspace_access(dev, cr_space); iounmap(cr_space); @@ -205,7 +226,7 @@ int mlx4_crdump_init(struct mlx4_dev *dev) /* Create cr-space region */ crdump->region_crspace = devlink_region_create(devlink, - region_cr_space_str, + ®ion_cr_space_ops, MAX_NUM_OF_DUMPS_TO_STORE, pci_resource_len(pdev, 0)); if (IS_ERR(crdump->region_crspace)) @@ -216,7 +237,7 @@ int mlx4_crdump_init(struct mlx4_dev *dev) /* Create fw-health region */ crdump->region_fw_health = devlink_region_create(devlink, - region_fw_health_str, + ®ion_fw_health_ops, MAX_NUM_OF_DUMPS_TO_STORE, HEALTH_BUFFER_SIZE); if (IS_ERR(crdump->region_fw_health)) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 8bf1f08fdee2..8a5ea2543670 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2121,6 +2121,10 @@ static int mlx4_en_set_phys_id(struct net_device *dev, } const struct ethtool_ops mlx4_en_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_PKT_RATE_RX_USECS, .get_drvinfo = mlx4_en_get_drvinfo, .get_link_ksettings = mlx4_en_get_link_ksettings, .set_link_ksettings = mlx4_en_set_link_ksettings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index a1f20b205299..312e0a1ad43d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -78,6 +78,16 @@ config MLX5_ESWITCH Legacy SRIOV mode (L2 mac vlan steering based). Switchdev mode (eswitch offloads). +config MLX5_TC_CT + bool "MLX5 TC connection tracking offload support" + depends on MLX5_CORE_EN && NET_SWITCHDEV && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + default y + help + Say Y here if you want to support offloading connection tracking rules + via tc ct action. + + If unsure, set to Y + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d3e06cec8317..6d32915000fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o # # Netdev basic @@ -25,7 +25,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \ - en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o # # Netdev extra @@ -34,15 +34,16 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ en/tc_tun_geneve.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o +mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o eswitch_offloads_chains.o + ecpf.o rdma.o esw/chains.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 50862275544e..1972ddd12704 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -193,7 +193,7 @@ bool mlx5_device_registered(struct mlx5_core_dev *dev) return found; } -int mlx5_register_device(struct mlx5_core_dev *dev) +void mlx5_register_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; @@ -203,8 +203,6 @@ int mlx5_register_device(struct mlx5_core_dev *dev) list_for_each_entry(intf, &intf_list, list) mlx5_add_device(intf, priv); mutex_unlock(&mlx5_intf_mutex); - - return 0; } void mlx5_unregister_device(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index ac108f1e5bd6..bdeb291f6b67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -90,7 +90,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, { struct mlx5_core_dev *dev = devlink_priv(devlink); - return mlx5_unload_one(dev, false); + mlx5_unload_one(dev, false); + return 0; } static int mlx5_devlink_reload_up(struct devlink *devlink, @@ -190,11 +191,6 @@ static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, return 0; } -enum mlx5_devlink_param_id { - MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, - MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, -}; - static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) @@ -210,14 +206,38 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, return 0; } +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + int group_num = val.vu32; + + if (group_num < 1 || group_num > 1024) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported group number, supported range is 1-1024"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + static const struct devlink_param mlx5_devlink_params[] = { - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, mlx5_devlink_fs_mode_validate), DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_enable_roce_validate), +#ifdef CONFIG_MLX5_ESWITCH + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + "fdb_large_groups", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, + mlx5_devlink_large_group_num_validate), +#endif }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -230,13 +250,20 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) else strcpy(value.vstr, "smfs"); devlink_param_driverinit_value_set(devlink, - MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, value); value.vbool = MLX5_CAP_GEN(dev, roce); devlink_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, value); + +#ifdef CONFIG_MLX5_ESWITCH + value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + value); +#endif } int mlx5_devlink_register(struct devlink *devlink, struct device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index d0ba03774ddf..f0de327a59be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -6,6 +6,12 @@ #include <net/devlink.h> +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, +}; + struct devlink *mlx5_devlink_alloc(void); void mlx5_devlink_free(struct devlink *devlink); int mlx5_devlink_register(struct devlink *devlink, struct device *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 94d7b69a95c7..c9c9b479bda5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -188,7 +188,7 @@ static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer) MLX5_SET(create_mkey_in, in, translations_octword_actual_size, DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); - mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++) mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c new file mode 100644 index 000000000000..17ab7efe693d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rsc_dump.h" +#include "lib/mlx5.h" + +#define MLX5_SGMT_TYPE(SGMT) MLX5_SGMT_TYPE_##SGMT +#define MLX5_SGMT_STR_ASSING(SGMT)[MLX5_SGMT_TYPE(SGMT)] = #SGMT +static const char *const mlx5_rsc_sgmt_name[] = { + MLX5_SGMT_STR_ASSING(HW_CQPC), + MLX5_SGMT_STR_ASSING(HW_SQPC), + MLX5_SGMT_STR_ASSING(HW_RQPC), + MLX5_SGMT_STR_ASSING(FULL_SRQC), + MLX5_SGMT_STR_ASSING(FULL_CQC), + MLX5_SGMT_STR_ASSING(FULL_EQC), + MLX5_SGMT_STR_ASSING(FULL_QPC), + MLX5_SGMT_STR_ASSING(SND_BUFF), + MLX5_SGMT_STR_ASSING(RCV_BUFF), + MLX5_SGMT_STR_ASSING(SRQ_BUFF), + MLX5_SGMT_STR_ASSING(CQ_BUFF), + MLX5_SGMT_STR_ASSING(EQ_BUFF), + MLX5_SGMT_STR_ASSING(SX_SLICE), + MLX5_SGMT_STR_ASSING(SX_SLICE_ALL), + MLX5_SGMT_STR_ASSING(RDB), + MLX5_SGMT_STR_ASSING(RX_SLICE_ALL), +}; + +struct mlx5_rsc_dump { + u32 pdn; + struct mlx5_core_mkey mkey; + u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; +}; + +struct mlx5_rsc_dump_cmd { + u64 mem_size; + u8 cmd[MLX5_ST_SZ_BYTES(resource_dump)]; +}; + +static int mlx5_rsc_dump_sgmt_get_by_name(char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5_rsc_sgmt_name); i++) + if (!strcmp(name, mlx5_rsc_sgmt_name[i])) + return i; + + return -EINVAL; +} + +static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page) +{ + void *data = page_address(page); + enum mlx5_sgmt_type sgmt_idx; + int num_of_items; + char *sgmt_name; + void *member; + void *menu; + int i; + + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records); + + for (i = 0; i < num_of_items; i++) { + member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]); + sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); + sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); + if (sgmt_idx == -EINVAL) + continue; + rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, + member, segment_type); + } +} + +static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + struct device *ddev = &dev->pdev->dev; + u32 out_seq_num; + u32 in_seq_num; + dma_addr_t dma; + int err; + + dma = dma_map_page(ddev, page, 0, cmd->mem_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(ddev, dma))) + return -ENOMEM; + + in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey.key); + MLX5_SET64(resource_dump, cmd->cmd, address, dma); + + err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd, + sizeof(cmd->cmd), MLX5_REG_RESOURCE_DUMP, 0, 1); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to access err %d\n", err); + goto out; + } + out_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + if (out_seq_num && (in_seq_num + 1 != out_seq_num)) + err = -EIO; +out: + dma_unmap_page(ddev, dma, cmd->mem_size, DMA_FROM_DEVICE); + return err; +} + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key) +{ + struct mlx5_rsc_dump_cmd *cmd; + int sgmt_type; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return ERR_PTR(-EOPNOTSUPP); + + sgmt_type = dev->rsc_dump->fw_segment_type[key->rsc]; + if (!sgmt_type && key->rsc != MLX5_SGMT_TYPE_MENU) + return ERR_PTR(-EOPNOTSUPP); + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) { + mlx5_core_err(dev, "Resource dump: Failed to allocate command\n"); + return ERR_PTR(-ENOMEM); + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, sgmt_type); + MLX5_SET(resource_dump, cmd->cmd, index1, key->index1); + MLX5_SET(resource_dump, cmd->cmd, index2, key->index2); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj1, key->num_of_obj1); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj2, key->num_of_obj2); + MLX5_SET(resource_dump, cmd->cmd, size, key->size); + cmd->mem_size = key->size; + return cmd; +} + +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd) +{ + kfree(cmd); +} + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size) +{ + bool more_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return -EOPNOTSUPP; + + err = mlx5_rsc_dump_trigger(dev, cmd, page); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to trigger dump, %d\n", err); + return err; + } + *size = MLX5_GET(resource_dump, cmd->cmd, size); + more_dump = MLX5_GET(resource_dump, cmd->cmd, more_dump); + + return more_dump; +} + +#define MLX5_RSC_DUMP_MENU_SEGMENT 0xffff +static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump_cmd *cmd = NULL; + struct mlx5_rsc_key key = {}; + struct page *page; + int size; + int err; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.rsc = MLX5_SGMT_TYPE_MENU; + key.size = PAGE_SIZE; + cmd = mlx5_rsc_dump_cmd_create(dev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, MLX5_RSC_DUMP_MENU_SEGMENT); + + do { + err = mlx5_rsc_dump_next(dev, cmd, page, &size); + if (err < 0) + goto destroy_cmd; + + mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page); + + } while (err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + + return err; +} + +static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump; + + if (!MLX5_CAP_DEBUG(dev, resource_dump)) { + mlx5_core_dbg(dev, "Resource dump: capability not present\n"); + return NULL; + } + rsc_dump = kzalloc(sizeof(*rsc_dump), GFP_KERNEL); + if (!rsc_dump) + return ERR_PTR(-ENOMEM); + + return rsc_dump; +} + +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + kfree(dev->rsc_dump); +} + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return 0; + + err = mlx5_core_alloc_pd(dev, &rsc_dump->pdn); + if (err) { + mlx5_core_warn(dev, "Resource dump: Failed to allocate PD %d\n", err); + return err; + } + err = mlx5_rsc_dump_create_mkey(dev, rsc_dump->pdn, &rsc_dump->mkey); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to create mkey, %d\n", err); + goto free_pd; + } + err = mlx5_rsc_dump_menu(dev); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to read menu, %d\n", err); + goto destroy_mkey; + } + return err; + +destroy_mkey: + mlx5_core_destroy_mkey(dev, &rsc_dump->mkey); +free_pd: + mlx5_core_dealloc_pd(dev, rsc_dump->pdn); + return err; +} + +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + + mlx5_core_destroy_mkey(dev, &dev->rsc_dump->mkey); + mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h new file mode 100644 index 000000000000..148270073e71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RSC_DUMP_H +#define __MLX5_RSC_DUMP_H + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +enum mlx5_sgmt_type { + MLX5_SGMT_TYPE_HW_CQPC, + MLX5_SGMT_TYPE_HW_SQPC, + MLX5_SGMT_TYPE_HW_RQPC, + MLX5_SGMT_TYPE_FULL_SRQC, + MLX5_SGMT_TYPE_FULL_CQC, + MLX5_SGMT_TYPE_FULL_EQC, + MLX5_SGMT_TYPE_FULL_QPC, + MLX5_SGMT_TYPE_SND_BUFF, + MLX5_SGMT_TYPE_RCV_BUFF, + MLX5_SGMT_TYPE_SRQ_BUFF, + MLX5_SGMT_TYPE_CQ_BUFF, + MLX5_SGMT_TYPE_EQ_BUFF, + MLX5_SGMT_TYPE_SX_SLICE, + MLX5_SGMT_TYPE_SX_SLICE_ALL, + MLX5_SGMT_TYPE_RDB, + MLX5_SGMT_TYPE_RX_SLICE_ALL, + MLX5_SGMT_TYPE_MENU, + MLX5_SGMT_TYPE_TERMINATE, + + MLX5_SGMT_TYPE_NUM, /* Keep last */ +}; + +struct mlx5_rsc_key { + enum mlx5_sgmt_type rsc; + int index1; + int index2; + int num_of_obj1; + int num_of_obj2; + int size; +}; + +#define MLX5_RSC_DUMP_ALL 0xFFFF +struct mlx5_rsc_dump_cmd; +struct mlx5_rsc_dump; + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev); + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev); + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key); +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd); + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c9606b8ab6ef..12a61bf82c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -204,7 +204,7 @@ struct mlx5e_tx_wqe { struct mlx5e_rx_wqe_ll { struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; struct mlx5e_rx_wqe_cyc { @@ -738,7 +738,6 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; - cpumask_var_t xps_cpumask; }; struct mlx5e_channels { @@ -814,6 +813,15 @@ struct mlx5e_xsk { bool ever_used; }; +/* Temporary storage for variables that are allocated when struct mlx5e_priv is + * initialized, and used where we can't allocate them because that functions + * must not fail. Use with care and make sure the same variable is not used + * simultaneously by multiple users. + */ +struct mlx5e_scratchpad { + cpumask_var_t cpumask; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -873,10 +881,12 @@ struct mlx5e_priv { #endif struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; + struct devlink_port dl_port; struct mlx5e_xsk xsk; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) struct mlx5e_hv_vhca_stats_agent stats_agent; #endif + struct mlx5e_scratchpad scratchpad; }; struct mlx5e_profile { @@ -1036,14 +1046,22 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); void mlx5e_close_channels(struct mlx5e_channels *chs); -/* Function pointer to be used to modify WH settings while +/* Function pointer to be used to modify HW or kernel settings while * switching channels */ -typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context); +#define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \ +int fn##_ctx(struct mlx5e_priv *priv, void *context) \ +{ \ + return fn(priv); \ +} int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify); + mlx5e_fp_preactivate preactivate, + void *context); +int mlx5e_num_channels_changed(struct mlx5e_priv *priv); +int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); @@ -1124,10 +1142,10 @@ void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); -typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context); int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, - change_hw_mtu_cb set_mtu_cb); + mlx5e_fp_preactivate preactivate); /* ethtool helpers */ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -1153,6 +1171,12 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, struct ethtool_link_ksettings *link_ksettings); int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings); +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, + const u8 hfunc); +int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs); +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c new file mode 100644 index 000000000000..f8b2de4b04be --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en/devlink.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv) +{ + struct devlink *devlink = priv_to_devlink(priv->mdev); + + if (mlx5_core_is_pf(priv->mdev)) + devlink_port_attrs_set(&priv->dl_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + PCI_FUNC(priv->mdev->pdev->devfn), + false, 0, + NULL, 0); + else + devlink_port_attrs_set(&priv->dl_port, + DEVLINK_PORT_FLAVOUR_VIRTUAL, + 0, false, 0, NULL, 0); + + return devlink_port_register(devlink, &priv->dl_port, 1); +} + +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) +{ + devlink_port_type_eth_set(&priv->dl_port, priv->netdev); +} + +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) +{ + devlink_port_unregister(&priv->dl_port); +} + +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return &priv->dl_port; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h new file mode 100644 index 000000000000..83123a801adc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_DEVLINK_H +#define __MLX5E_EN_DEVLINK_H + +#include <net/devlink.h> +#include "en.h" + +int mlx5e_devlink_port_register(struct mlx5e_priv *priv); +void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); +struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 20b907dc1e29..3a199a03d929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -3,6 +3,7 @@ #include "health.h" #include "lib/eq.h" +#include "lib/mlx5.h" int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) { @@ -197,10 +198,114 @@ int mlx5e_health_report(struct mlx5e_priv *priv, struct devlink_health_reporter *reporter, char *err_str, struct mlx5e_err_ctx *err_ctx) { - netdev_err(priv->netdev, err_str); + netdev_err(priv->netdev, "%s\n", err_str); if (!reporter) return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } + +#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 +static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, + const void *value, u32 value_len) + +{ + u32 data_size; + u32 offset; + int err; + + for (offset = 0; offset < value_len; offset += data_size) { + data_size = value_len - offset; + if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) + data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; + err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); + if (err) + break; + } + return err; +} + +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_rsc_dump_cmd *cmd; + struct page *page; + int cmd_err, err; + int end_err; + int size; + + if (IS_ERR_OR_NULL(mdev->rsc_dump)) + return -EOPNOTSUPP; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); + if (err) + return err; + + cmd = mlx5_rsc_dump_cmd_create(mdev, key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0) { + err = cmd_err; + goto destroy_cmd; + } + + err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); + if (err) + goto destroy_cmd; + + } while (cmd_err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; +free_page: + __free_page(page); + return err; +} + +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl) +{ + struct mlx5_rsc_key key = {}; + int err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = queue_idx; + key.size = PAGE_SIZE; + key.num_of_obj1 = 1; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); + if (err) + return err; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index e54f70d9af22..38f97f79ef16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -5,6 +5,7 @@ #define __MLX5E_EN_HEALTH_H #include "en.h" +#include "diag/rsc_dump.h" #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) @@ -35,6 +36,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); struct mlx5e_err_ctx { int (*recover)(void *ctx); + int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx); void *ctx; }; @@ -47,6 +49,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv, int mlx5e_health_create_reporters(struct mlx5e_priv *priv); void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); void mlx5e_health_channels_update(struct mlx5e_priv *priv); - - +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg); +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 000000000000..ea321e528749 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/xarray.h> +#include <linux/hashtable.h> + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 000000000000..285525cc5470 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +#endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index fce6eccdcf8b..2c4a670c8ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -343,64 +343,76 @@ out: return err; } -static u32 fec_supported_speeds[] = { - 10000, - 40000, - 25000, - 50000, - 56000, - 100000 +enum mlx5e_fec_supported_link_mode { + MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G, + MLX5E_FEC_SUPPORTED_LINK_MODES_25G, + MLX5E_FEC_SUPPORTED_LINK_MODES_50G, + MLX5E_FEC_SUPPORTED_LINK_MODES_56G, + MLX5E_FEC_SUPPORTED_LINK_MODES_100G, + MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X, + MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; -#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds) +#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X + +#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ + do { \ + u16 *_policy = &(policy); \ + u32 *_buf = buf; \ + \ + if (write) \ + MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \ + else \ + *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ + } while (0) + +#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ + do { \ + u16 *__policy = &(policy); \ + bool _write = (write); \ + \ + if (_write && *__policy) \ + *__policy = find_first_bit((u_long *)__policy, \ + sizeof(u16) * BITS_PER_BYTE);\ + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ + if (!_write && *__policy) \ + *__policy = 1 << *__policy; \ + } while (0) /* get/set FEC admin field for a given speed */ -static int mlx5e_fec_admin_field(u32 *pplm, - u8 *fec_policy, - bool write, - u32 speed) +static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, + enum mlx5e_fec_supported_link_mode link_mode) { - switch (speed) { - case 10000: - case 40000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_10g_40g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_10g_40g, *fec_policy); + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g); break; - case 25000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_25g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_25g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g); break; - case 50000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_50g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_50g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g); break; - case 56000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_56g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_56g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g); break; - case 100000: - if (!write) - *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_admin_100g); - else - MLX5_SET(pplm_reg, pplm, - fec_override_admin_100g, *fec_policy); + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; @@ -408,32 +420,40 @@ static int mlx5e_fec_admin_field(u32 *pplm, return 0; } +#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \ + MLX5_GET(pplm_reg, buf, fec_override_cap_##link) + /* returns FEC capabilities for a given speed */ -static int mlx5e_get_fec_cap_field(u32 *pplm, - u8 *fec_cap, - u32 speed) +static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, + enum mlx5e_fec_supported_link_mode link_mode) { - switch (speed) { - case 10000: - case 40000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_10g_40g); + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g); break; - case 25000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_25g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x); break; - case 50000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_50g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x); break; - case 56000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_56g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x); break; - case 100000: - *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_cap_100g); + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x); break; default: return -EINVAL; @@ -441,13 +461,14 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, return 0; } -int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) { + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 current_fec_speed; int err; + int i; if (!MLX5_CAP_GEN(dev, pcam_reg)) return -EOPNOTSUPP; @@ -458,23 +479,30 @@ int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) - return err; + return false; - err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); - if (err) - return err; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 fec_caps; - return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed); + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + if (fec_caps & fec_policy) + return true; + } + return false; } int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, - u8 *fec_configured_mode) + u16 *fec_configured_mode) { + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 link_speed; int err; + int i; if (!MLX5_CAP_GEN(dev, pcam_reg)) return -EOPNOTSUPP; @@ -490,24 +518,28 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); if (!fec_configured_mode) - return 0; + goto out; - err = mlx5e_port_linkspeed(dev, &link_speed); - if (err) - return err; + *fec_configured_mode = 0; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; - return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed); + mlx5e_fec_admin_field(out, fec_configured_mode, 0, i); + if (*fec_configured_mode != 0) + goto out; + } +out: + return 0; } -int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { - u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC); - bool fec_mode_not_supp_in_speed = false; + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u8 fec_policy_auto = 0; - u8 fec_caps = 0; + u16 fec_policy_auto = 0; int err; int i; @@ -517,6 +549,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) if (!MLX5_CAP_PCAM_REG(dev, pplm)) return -EOPNOTSUPP; + if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) + return -EOPNOTSUPP; + MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) @@ -524,25 +559,31 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) MLX5_SET(pplm_reg, out, local_port, 1); - for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) { - mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]); - /* policy supported for link speed, or policy is auto */ - if (fec_caps & fec_policy || fec_policy == fec_policy_auto) { - mlx5e_fec_admin_field(out, &fec_policy, 1, - fec_supported_speeds[i]); - } else { - /* turn off FEC if supported. Else, leave it the same */ - if (fec_caps & fec_policy_nofec) - mlx5e_fec_admin_field(out, &fec_policy_nofec, 1, - fec_supported_speeds[i]); - fec_mode_not_supp_in_speed = true; - } - } + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 conf_fec = fec_policy; + u16 fec_caps = 0; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; - if (fec_mode_not_supp_in_speed) - mlx5_core_dbg(dev, - "FEC policy 0x%x is not supported for some speeds", - fec_policy); + /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 + * to link modes up to 25G per lane and to + * MLX5E_FEC_RS_544_514 in the new link modes based on + * 50 G per lane + */ + if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && + i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + conf_fec = (1 << MLX5E_FEC_RS_544_514); + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + + /* policy supported for link speed */ + if (fec_caps & conf_fec) + mlx5e_fec_admin_field(out, &conf_fec, 1, i); + else + /* set FEC to auto*/ + mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i); + } return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 4a7f4497692b..a2ddd446dd59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -60,15 +60,17 @@ int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); -int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps); +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy); int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, - u8 *fec_configured_mode); -int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy); + u16 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy); enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514 = 7, + MLX5E_FEC_LLRS_272_257_1 = 9, }; #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index a01e2de2488f..c209579fc213 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -102,19 +102,6 @@ out: return err; } -void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) -{ - struct mlx5e_priv *priv = icosq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = icosq; - err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; - sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; @@ -171,19 +158,6 @@ out: return err; } -void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) -{ - struct mlx5e_priv *priv = rq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = rq; - err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; - sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5e_icosq *icosq; @@ -201,21 +175,6 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx) return err; } -void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) -{ - struct mlx5e_icosq *icosq = &rq->channel->icosq; - struct mlx5e_priv *priv = rq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {}; - - err_ctx.ctx = rq; - err_ctx.recover = mlx5e_rx_reporter_timeout_recover; - sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n", - icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); - - mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); -} - static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) { return err_ctx->recover(err_ctx->ctx); @@ -371,10 +330,235 @@ unlock: return err; } +static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_txqsq *icosq = ctx; + struct mlx5_rsc_key key = {}; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = icosq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_rq *rq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = rq->rqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "receive_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5_rsc_key key = {}; + int i, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ"); + if (err) + return err; + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *icosq = &rq->channel->icosq; + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), + "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x", + icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_icosq; + snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, .diagnose = mlx5e_rx_reporter_diagnose, + .dump = mlx5e_rx_reporter_dump, }; #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 @@ -387,7 +571,7 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) reporter = devlink_health_reporter_create(devlink, &mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, - true, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", PTR_ERR(reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b468549e96ff..9805fc085512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -82,19 +82,6 @@ out: return err; } -void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) -{ - struct mlx5e_priv *priv = sq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx = {0}; - - err_ctx.ctx = sq; - err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; - sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); - - mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); -} - static int mlx5e_tx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; @@ -110,22 +97,6 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } -int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) -{ - struct mlx5e_priv *priv = sq->channel->priv; - char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; - struct mlx5e_err_ctx err_ctx; - - err_ctx.ctx = sq; - err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - sprintf(err_str, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - sq->txq->trans_start)); - - return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); -} - /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ @@ -275,10 +246,162 @@ unlock: return err; } +static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_txqsq *sq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = sq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_reporter_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5_rsc_key key = {}; + int i, tc, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); + if (err) + return err; + } + } + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); +} + +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + + return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { .name = "tx", .recover = mlx5e_tx_reporter_recover, .diagnose = mlx5e_tx_reporter_diagnose, + .dump = mlx5e_tx_reporter_dump, }; #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 @@ -293,7 +416,7 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, - true, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c new file mode 100644 index 000000000000..ad3e3a65d403 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <uapi/linux/tc_act/tc_pedit.h> +#include <net/tc_act/tc_ct.h> +#include <net/flow_offload.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/workqueue.h> + +#include "esw/chains.h" +#include "en/tc_ct.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" + +#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) +#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) +#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) +#define MLX5_CT_STATE_TRK_BIT BIT(2) + +#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) +#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) +#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX + +#define ct_dbg(fmt, args...)\ + netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) + +struct mlx5_tc_ct_priv { + struct mlx5_eswitch *esw; + const struct net_device *netdev; + struct idr fte_ids; + struct idr tuple_ids; + struct rhashtable zone_ht; + struct mlx5_flow_table *ct; + struct mlx5_flow_table *ct_nat; + struct mlx5_flow_table *post_ct; + struct mutex control_lock; /* guards parallel adds/dels */ +}; + +struct mlx5_ct_flow { + struct mlx5_esw_flow_attr pre_ct_attr; + struct mlx5_esw_flow_attr post_ct_attr; + struct mlx5_flow_handle *pre_ct_rule; + struct mlx5_flow_handle *post_ct_rule; + struct mlx5_ct_ft *ft; + u32 fte_id; + u32 chain_mapping; +}; + +struct mlx5_ct_zone_rule { + struct mlx5_flow_handle *rule; + struct mlx5_esw_flow_attr attr; + int tupleid; + bool nat; +}; + +struct mlx5_ct_ft { + struct rhash_head node; + u16 zone; + refcount_t refcount; + struct nf_flowtable *nf_ft; + struct mlx5_tc_ct_priv *ct_priv; + struct rhashtable ct_entries_ht; + struct list_head ct_entries_list; +}; + +struct mlx5_ct_entry { + struct list_head list; + u16 zone; + struct rhash_head node; + struct flow_rule *flow_rule; + struct mlx5_fc *counter; + unsigned long lastuse; + unsigned long cookie; + unsigned long restore_cookie; + struct mlx5_ct_zone_rule zone_rules[2]; +}; + +static const struct rhashtable_params cts_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, node), + .key_offset = offsetof(struct mlx5_ct_entry, cookie), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params zone_params = { + .head_offset = offsetof(struct mlx5_ct_ft, node), + .key_offset = offsetof(struct mlx5_ct_ft, zone), + .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), + .automatic_shrinking = true, +}; + +static struct mlx5_tc_ct_priv * +mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + return uplink_priv->ct_priv; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec, + struct flow_rule *rule) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match.mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match.key->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + break; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + } + + return 0; +} + +static void +mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + ct_dbg("Deleting ct entry rule in zone %d", entry->zone); + + mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +} + +static void +mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); +} + +static struct flow_action_entry * +mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) +{ + struct flow_action *flow_action = &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) + return act; + } + + return NULL; +} + +static int +mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + u8 ct_state, + u32 mark, + u32 label, + u32 tupleid) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + CTSTATE_TO_REG, ct_state); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + MARK_TO_REG, mark); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + LABELS_TO_REG, label); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, + TUPLEID_TO_REG, tupleid); + if (err) + return err; + + return 0; +} + +static int +mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, + char *modact) +{ + u32 offset = act->mangle.offset, field; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct iphdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; + else if (offset == offsetof(struct iphdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct ipv6hdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, saddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; + else if (offset == offsetof(struct ipv6hdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct tcphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; + else if (offset == offsetof(struct tcphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct udphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; + else if (offset == offsetof(struct udphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, offset, 0); + MLX5_SET(set_action_in, modact, field, field); + MLX5_SET(set_action_in, modact, data, act->mangle.val); + + return 0; +} + +static int +mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct flow_action *flow_action = &flow_rule->action; + struct mlx5_core_dev *mdev = ct_priv->esw->dev; + struct flow_action_entry *act; + size_t action_size; + char *modact; + int err, i; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: { + err = alloc_mod_hdr_actions(mdev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts); + if (err) + return err; + + modact = mod_acts->actions + + mod_acts->num_actions * action_size; + + err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); + if (err) + return err; + + mod_acts->num_actions++; + } + break; + + case FLOW_ACTION_CT_METADATA: + /* Handled earlier */ + continue; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_esw_flow_attr *attr, + struct flow_rule *flow_rule, + u32 tupleid, + bool nat) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_modify_hdr *mod_hdr; + struct flow_action_entry *meta; + int err; + + meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta) + return -EOPNOTSUPP; + + if (meta->ct_metadata.labels[1] || + meta->ct_metadata.labels[2] || + meta->ct_metadata.labels[3]) { + ct_dbg("Failed to offload ct entry due to unsupported label"); + return -EOPNOTSUPP; + } + + if (nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, + &mod_acts); + if (err) + goto err_mapping; + } + + err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, + (MLX5_CT_STATE_ESTABLISHED_BIT | + MLX5_CT_STATE_TRK_BIT), + meta->ct_metadata.mark, + meta->ct_metadata.labels[0], + tupleid); + if (err) + goto err_mapping; + + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mapping; + } + attr->modify_hdr = mod_hdr; + + dealloc_mod_hdr_actions(&mod_acts); + return 0; + +err_mapping: + dealloc_mod_hdr_actions(&mod_acts); + return err; +} + +static int +mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_esw_flow_attr *attr = &zone_rule->attr; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_flow_spec *spec = NULL; + u32 tupleid = 1; + int err; + + zone_rule->nat = nat; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Get tuple unique id */ + err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid, + TUPLE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(ct_priv->netdev, + "Failed to allocate tuple id, err: %d\n", err); + goto err_idr_alloc; + } + zone_rule->tupleid = tupleid; + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, + tupleid, nat); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = 0; + attr->dest_ft = ct_priv->post_ct; + attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; + attr->outer_match_level = MLX5_MATCH_L4; + attr->counter = entry->counter; + attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + + mlx5_tc_ct_set_tuple_match(spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + entry->zone & MLX5_CT_ZONE_MASK, + MLX5_CT_ZONE_MASK); + + zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(zone_rule->rule)) { + err = PTR_ERR(zone_rule->rule); + ct_dbg("Failed to add ct entry rule, nat: %d", nat); + goto err_rule; + } + + kfree(spec); + ct_dbg("Offloaded ct entry rule in zone %d", entry->zone); + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); +err_mod_hdr: + idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +err_idr_alloc: + kfree(spec); + return err; +} + +static int +mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry) +{ + struct mlx5_eswitch *esw = ct_priv->esw; + int err; + + entry->counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); + ct_dbg("Failed to create counter for ct entry"); + return err; + } + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false); + if (err) + goto err_orig; + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true); + if (err) + goto err_nat; + + return 0; + +err_nat: + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); +err_orig: + mlx5_fc_destroy(esw->dev, entry->counter); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_action_entry *meta_action; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + int err; + + meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta_action) + return -EOPNOTSUPP; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (entry) + return 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->zone = ft->zone; + entry->flow_rule = flow_rule; + entry->cookie = flow->cookie; + entry->restore_cookie = meta_action->ct_metadata.cookie; + + err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry); + if (err) + goto err_rules; + + err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_insert; + + list_add(&entry->list, &ft->ct_entries_list); + + return 0; + +err_insert: + mlx5_tc_ct_entry_del_rules(ct_priv, entry); +err_rules: + kfree(entry); + netdev_warn(ct_priv->netdev, + "Failed to offload ct entry, err: %d\n", err); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); + WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params)); + list_del(&entry->list); + kfree(entry); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, + struct flow_cls_offload *f) +{ + unsigned long cookie = f->cookie; + struct mlx5_ct_entry *entry; + u64 lastuse, packets, bytes; + + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, + cts_ht_params); + if (!entry) + return -ENOENT; + + mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct mlx5_ct_ft *ft = cb_priv; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlx5_tc_ct_block_flow_offload_add(ft, f); + case FLOW_CLS_DESTROY: + return mlx5_tc_ct_block_flow_offload_del(ft, f); + case FLOW_CLS_STATS: + return mlx5_tc_ct_block_flow_offload_stats(ft, f); + default: + break; + }; + + return -EOPNOTSUPP; +} + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct flow_dissector_key_ct *mask, *key; + bool trk, est, untrk, unest, new; + u32 ctstate = 0, ctstate_mask = 0; + u16 ct_state_on, ct_state_off; + u16 ct_state, ct_state_mask; + struct flow_match_ct match; + + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct matching isn't available"); + return -EOPNOTSUPP; + } + + flow_rule_match_ct(f->rule, &match); + + key = match.key; + mask = match.mask; + + ct_state = key->ct_state; + ct_state_mask = mask->ct_state; + + if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_NEW)) { + NL_SET_ERR_MSG_MOD(extack, + "only ct_state trk, est and new are supported for offload"); + return -EOPNOTSUPP; + } + + if (mask->ct_labels[1] || mask->ct_labels[2] || mask->ct_labels[3]) { + NL_SET_ERR_MSG_MOD(extack, + "only lower 32bits of ct_labels are supported for offload"); + return -EOPNOTSUPP; + } + + ct_state_on = ct_state & ct_state_mask; + ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; + trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; + est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + + ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + + if (new) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +new isn't supported"); + return -EOPNOTSUPP; + } + + if (mask->ct_zone) + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + key->ct_zone, MLX5_CT_ZONE_MASK); + if (ctstate_mask) + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + if (mask->ct_mark) + mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, + key->ct_mark, mask->ct_mark); + if (mask->ct_labels[0]) + mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, + key->ct_labels[0], + mask->ct_labels[0]); + + return 0; +} + +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + + if (!ct_priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct action isn't available"); + return -EOPNOTSUPP; + } + + attr->ct_attr.zone = act->ct.zone; + attr->ct_attr.ct_action = act->ct.action; + attr->ct_attr.nf_ft = act->ct.flow_table; + + return 0; +} + +static struct mlx5_ct_ft * +mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, + struct nf_flowtable *nf_ft) +{ + struct mlx5_ct_ft *ft; + int err; + + ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); + if (ft) { + refcount_inc(&ft->refcount); + return ft; + } + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ft->zone = zone; + ft->nf_ft = nf_ft; + ft->ct_priv = ct_priv; + INIT_LIST_HEAD(&ft->ct_entries_list); + refcount_set(&ft->refcount, 1); + + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); + if (err) + goto err_init; + + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, + zone_params); + if (err) + goto err_insert; + + err = nf_flow_table_offload_add_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + if (err) + goto err_add_cb; + + return ft; + +err_add_cb: + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); +err_insert: + rhashtable_destroy(&ft->ct_entries_ht); +err_init: + kfree(ft); + return ERR_PTR(err); +} + +static void +mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + struct mlx5_ct_entry *entry; + + list_for_each_entry(entry, &ft->ct_entries_list, list) + mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); +} + +static void +mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + if (!refcount_dec_and_test(&ft->refcount)) + return; + + nf_flow_table_offload_del_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + mlx5_tc_ct_flush_ft(ct_priv, ft); + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); + rhashtable_destroy(&ft->ct_entries_ht); + kfree(ft); +} + +/* We translate the tc filter with CT action to the following HW model: + * + * +-------------------+ +--------------------+ +--------------+ + * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> + * + original match + | + tuple + zone match + | + fte_id match + | + * +-------------------+ | +--------------------+ | +--------------+ | + * v v v + * set chain miss mapping set mark original + * set fte_id set label filter + * set zone set established actions + * set tunnel_id do nat (if needed) + * do decap + */ +static int +__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_flow_spec *post_ct_spec = NULL; + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int chain_mapping = 0, err; + struct mlx5_ct_ft *ft; + u32 fte_id = 1; + + post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!post_ct_spec || !ct_flow) { + kfree(post_ct_spec); + kfree(ct_flow); + return -ENOMEM; + } + + /* Register for CT established events */ + ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, + attr->ct_attr.nf_ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to register to ft callback"); + goto err_ft; + } + ct_flow->ft = ft; + + err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, + MLX5_FTE_ID_MAX, GFP_KERNEL); + if (err) { + netdev_warn(priv->netdev, + "Failed to allocate fte id, err: %d\n", err); + goto err_idr; + } + ct_flow->fte_id = fte_id; + + /* Base esw attributes of both rules on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); + + /* Modify the original rule's action to fwd and modify, leave decap */ + pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Write chain miss tag for miss in ct table as we + * don't go though all prios of this chain as normal tc rules + * miss. + */ + err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, + &chain_mapping); + if (err) { + ct_dbg("Failed to get chain register mapping for chain"); + goto err_get_chain; + } + ct_flow->chain_mapping = chain_mapping; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + CHAIN_TO_REG, chain_mapping); + if (err) { + ct_dbg("Failed to set chain register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, + attr->ct_attr.zone & + MLX5_CT_ZONE_MASK); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + FTEID_TO_REG, fte_id); + if (err) { + ct_dbg("Failed to set fte_id register mapping"); + goto err_mapping; + } + + /* If original flow is decap, we do it before going into ct table + * so add a rewrite for the tunnel match_id. + */ + if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && + attr->chain == 0) { + u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); + + err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, + TUNNEL_TO_REG, + tun_id); + if (err) { + ct_dbg("Failed to set tunnel register mapping"); + goto err_mapping; + } + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct_attr->modify_hdr = mod_hdr; + + /* Post ct rule matches on fte_id and executes original rule's + * tc rule action + */ + mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, + fte_id, MLX5_FTE_ID_MASK); + + /* Put post_ct rule on post_ct fdb */ + ct_flow->post_ct_attr.chain = 0; + ct_flow->post_ct_attr.prio = 0; + ct_flow->post_ct_attr.fdb = ct_priv->post_ct; + + ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; + ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, + &ct_flow->post_ct_attr); + ct_flow->post_ct_rule = rule; + if (IS_ERR(ct_flow->post_ct_rule)) { + err = PTR_ERR(ct_flow->post_ct_rule); + ct_dbg("Failed to add post ct rule"); + goto err_insert_post_ct; + } + + /* Change original rule point to ct table */ + pre_ct_attr->dest_chain = 0; + pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, + orig_spec, + pre_ct_attr); + if (IS_ERR(ct_flow->pre_ct_rule)) { + err = PTR_ERR(ct_flow->pre_ct_rule); + ct_dbg("Failed to add pre ct rule"); + goto err_insert_orig; + } + + attr->ct_attr.ct_flow = ct_flow; + *flow_rule = ct_flow->post_ct_rule; + dealloc_mod_hdr_actions(&pre_mod_acts); + kfree(post_ct_spec); + + return 0; + +err_insert_orig: + mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); +err_insert_post_ct: + mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); +err_get_chain: + idr_remove(&ct_priv->fte_ids, fte_id); +err_idr: + mlx5_tc_ct_del_ft_cb(ct_priv, ft); +err_ft: + kfree(post_ct_spec); + kfree(ct_flow); + netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); + return err; +} + +static int +__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *orig_spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + struct mlx5_flow_handle **flow_rule) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_eswitch *esw = ct_priv->esw; + struct mlx5_esw_flow_attr *pre_ct_attr; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_ct_flow *ct_flow; + int err; + + ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); + if (!ct_flow) + return -ENOMEM; + + /* Base esw attributes on original rule attribute */ + pre_ct_attr = &ct_flow->pre_ct_attr; + memcpy(pre_ct_attr, attr, sizeof(*attr)); + + err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); + if (err) { + ct_dbg("Failed to set register for ct clear"); + goto err_set_registers; + } + + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_FDB, + mod_acts->num_actions, + mod_acts->actions); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to add create ct clear mod hdr"); + goto err_set_registers; + } + + dealloc_mod_hdr_actions(mod_acts); + pre_ct_attr->modify_hdr = mod_hdr; + pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add ct clear rule"); + goto err_insert; + } + + attr->ct_attr.ct_flow = ct_flow; + ct_flow->pre_ct_rule = rule; + *flow_rule = rule; + + return 0; + +err_insert: + mlx5_modify_header_dealloc(priv->mdev, mod_hdr); +err_set_registers: + netdev_warn(priv->netdev, + "Failed to offload ct clear flow, err %d\n", err); + return err; +} + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_flow_handle *rule; + int err; + + if (!ct_priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&ct_priv->control_lock); + if (clear_action) + err = __mlx5_tc_ct_flow_offload_clear(priv, flow, spec, attr, + mod_hdr_acts, &rule); + else + err = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr, + &rule); + mutex_unlock(&ct_priv->control_lock); + if (err) + return ERR_PTR(err); + + return rule; +} + +static void +__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_flow *ct_flow) +{ + struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; + struct mlx5_eswitch *esw = ct_priv->esw; + + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, + pre_ct_attr); + mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); + + if (ct_flow->post_ct_rule) { + mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, + &ct_flow->post_ct_attr); + mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); + idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); + } + + kfree(ct_flow); +} + +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; + + /* We are called on error to clean up stuff from parsing + * but we don't have anything for now + */ + if (!ct_flow) + return; + + mutex_lock(&ct_priv->control_lock); + __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); + mutex_unlock(&ct_priv->control_lock); +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, + const char **err_msg) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + /* cannot restore chain ID on HW miss */ + + *err_msg = "tc skb extension missing"; + return -EOPNOTSUPP; +#endif + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { + *err_msg = "firmware level support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { + /* vlan workaround should be avoided for multi chain rules. + * This is just a sanity check as pop vlan action should + * be supported by any FW that supports ignore_flow_level + */ + + *err_msg = "firmware vlan actions support is missing"; + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, + fdb_modify_header_fwd_to_table)) { + /* CT always writes to registers which are mod header actions. + * Therefore, mod header and goto is required + */ + + *err_msg = "firmware fwd and modify support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *err_msg = "register loopback isn't supported"; + return -EOPNOTSUPP; + } + + return 0; +} + +static void +mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) +{ + if (msg) + netdev_warn(rpriv->netdev, + "tc ct offload not supported, %s, err: %d\n", + msg, err); + else + netdev_warn(rpriv->netdev, + "tc ct offload not supported, err: %d\n", + err); +} + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + const char *msg; + int err; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_tc_ct_init_check_support(esw, &msg); + if (err) { + mlx5_tc_ct_init_err(rpriv, msg, err); + goto err_support; + } + + ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); + if (!ct_priv) { + mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); + goto err_alloc; + } + + ct_priv->esw = esw; + ct_priv->netdev = rpriv->netdev; + ct_priv->ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct)) { + err = PTR_ERR(ct_priv->ct); + mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); + goto err_ct_tbl; + } + + ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->ct_nat)) { + err = PTR_ERR(ct_priv->ct_nat); + mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", + err); + goto err_ct_nat_tbl; + } + + ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); + if (IS_ERR(ct_priv->post_ct)) { + err = PTR_ERR(ct_priv->post_ct); + mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", + err); + goto err_post_ct_tbl; + } + + idr_init(&ct_priv->fte_ids); + idr_init(&ct_priv->tuple_ids); + mutex_init(&ct_priv->control_lock); + rhashtable_init(&ct_priv->zone_ht, &zone_params); + + /* Done, set ct_priv to know it initializted */ + uplink_priv->ct_priv = ct_priv; + + return 0; + +err_post_ct_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); +err_ct_nat_tbl: + mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); +err_ct_tbl: + kfree(ct_priv); +err_alloc: +err_support: + + return 0; +} + +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + + if (!ct_priv) + return; + + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); + mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); + + rhashtable_destroy(&ct_priv->zone_ht); + mutex_destroy(&ct_priv->control_lock); + idr_destroy(&ct_priv->tuple_ids); + idr_destroy(&ct_priv->fte_ids); + kfree(ct_priv); + + uplink_priv->ct_priv = NULL; +} + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; + struct mlx5_ct_zone_rule *zone_rule; + struct mlx5_ct_entry *entry; + + if (!ct_priv || !tupleid) + return true; + + zone_rule = idr_find(&ct_priv->tuple_ids, tupleid); + if (!zone_rule) + return false; + + entry = container_of(zone_rule, struct mlx5_ct_entry, + zone_rules[zone_rule->nat]); + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h new file mode 100644 index 000000000000..091d305b633e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_CT_H__ +#define __MLX5_EN_TC_CT_H__ + +#include <net/pkt_cls.h> +#include <linux/mlx5/fs.h> +#include <net/tc_act/tc_ct.h> + +#include "en.h" + +struct mlx5_esw_flow_attr; +struct mlx5e_tc_mod_hdr_acts; +struct mlx5_rep_uplink_priv; +struct mlx5e_tc_flow; +struct mlx5e_priv; + +struct mlx5_ct_flow; + +struct nf_flowtable; + +struct mlx5_ct_attr { + u16 zone; + u16 ct_action; + struct mlx5_ct_flow *ct_flow; + struct nf_flowtable *nf_ft; +}; + +#define zone_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 0,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2) + 2,\ +} + +#define ctstate_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 2,\ + .mlen = 2,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define mark_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_3),\ +} + +#define labels_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_4),\ +} + +#define fteid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ + .moffset = 0,\ + .mlen = 4,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_5),\ +} + +#define tupleid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ + .moffset = 0,\ + .mlen = 3,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_1),\ +} + +#define TUPLE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[TUPLEID_TO_REG].mlen * 8) +#define TUPLE_ID_MAX GENMASK(TUPLE_ID_BITS - 1, 0) + +#if IS_ENABLED(CONFIG_MLX5_TC_CT) + +int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); +void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); + +int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack); +int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid); + +#else /* CONFIG_MLX5_TC_CT */ + +static inline int +mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + return 0; +} + +static inline void +mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) +{ +} + +static inline int +mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct netlink_ext_ack *extack) +{ + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, + struct mlx5_esw_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); + return -EOPNOTSUPP; +} + +static inline struct mlx5_flow_handle * +mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ +} + +static inline bool +mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, + struct sk_buff *skb, u32 tupleid) +{ + if (!tupleid) + return true; + + return false; +} + +#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ +#endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index af4ebd2951b5..b45c3f46570b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -66,6 +66,9 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) return -EOPNOTSUPP; + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + return -EOPNOTSUPP; + return 0; } @@ -469,10 +472,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) + u8 *match_level) { struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; int err = 0; if (!tunnel) { @@ -499,6 +507,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, goto out; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IP); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IPV6); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 6f9a78c85ffd..1630f0ec3ad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level); + u8 *match_level); int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index cf58c9637904..29626c6c9c25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -433,7 +433,6 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) if (!ipsec) return; - drain_workqueue(ipsec->wq); destroy_workqueue(ipsec->wq); ida_destroy(&ipsec->halloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 2c75b2752f58..014639ea06e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -175,28 +175,20 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_spec *spec; enum mlx5e_traffic_types tt; int err = 0; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out; - } - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; tt = arfs_get_tt(type); if (tt == -EINVAL) { netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", __func__, type); - err = -EINVAL; - goto out; + return -EINVAL; } dest.tir_num = tir[tt].tirn; - arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); if (IS_ERR(arfs_t->default_rule)) { @@ -205,8 +197,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", __func__, type); } -out: - kvfree(spec); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 01f2918063af..47874d34156b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1098,49 +1098,59 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) mlx5e_dcbnl_dscp_app(priv, DELETE); } -static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, - struct mlx5e_params *params) +static void mlx5e_params_calc_trust_tx_min_inline_mode(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u8 trust_state) { - mlx5_query_min_inline(priv->mdev, ¶ms->tx_min_inline_mode); - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (trust_state == MLX5_QPTS_TRUST_DSCP && params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; } -static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) +static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) +{ + u8 *trust_state = context; + int err; + + err = mlx5_set_trust_state(priv->mdev, *trust_state); + if (err) + return err; + priv->dcbx_dp.trust_state = *trust_state; + + return 0; +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { struct mlx5e_channels new_channels = {}; + bool reset_channels = true; + int err = 0; mutex_lock(&priv->state_lock); new_channels.params = priv->channels.params; - mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params, + trust_state); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; - goto out; + reset_channels = false; } /* Skip if tx_min_inline is the same */ if (new_channels.params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) - goto out; + reset_channels = false; - mlx5e_safe_switch_channels(priv, &new_channels, NULL); + if (reset_channels) + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_update_trust_state_hw, + &trust_state); + else + err = mlx5e_update_trust_state_hw(priv, &trust_state); -out: mutex_unlock(&priv->state_lock); -} - -static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) -{ - int err; - - err = mlx5_set_trust_state(priv->mdev, trust_state); - if (err) - return err; - priv->dcbx_dp.trust_state = trust_state; - mlx5e_trust_update_sq_inline_mode(priv); return err; } @@ -1171,7 +1181,8 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) if (err) return err; - mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params); + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, + priv->dcbx_dp.trust_state); err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d674cb679895..6d703ddee4e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -357,7 +357,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, goto unlock; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); unlock: mutex_unlock(&priv->state_lock); @@ -432,9 +432,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { *cur_params = new_channels.params; - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); + mlx5e_num_channels_changed(priv); goto out; } @@ -442,12 +440,9 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (arfs_enabled) mlx5e_arfs_disable(priv); - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); - /* Switch to new channels, set new parameters and close old ones */ - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_num_channels_changed_ctx, NULL); if (arfs_enabled) { int err2 = mlx5e_arfs_enable(priv); @@ -580,7 +575,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); out: mutex_unlock(&priv->state_lock); @@ -633,6 +628,8 @@ static const u32 pplm_fec_2_ethtool[] = { [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF, [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER, [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, }; static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) @@ -650,45 +647,48 @@ static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) return 0; } -/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */ -static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code) -{ - u32 offset; - - offset = find_first_bit(ðtool_fec_code, sizeof(u32)); - offset -= ETHTOOL_FEC_OFF_BIT; - offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT; +#define MLX5E_ADVERTISE_SUPPORTED_FEC(mlx5_fec, ethtool_fec) \ + do { \ + if (mlx5e_fec_in_caps(dev, 1 << (mlx5_fec))) \ + __set_bit(ethtool_fec, \ + link_ksettings->link_modes.supported);\ + } while (0) - return offset; -} +static const u32 pplm_fec_2_ethtool_linkmodes[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_LINK_MODE_FEC_NONE_BIT, + [MLX5E_FEC_FIRECODE] = ETHTOOL_LINK_MODE_FEC_BASER_BIT, + [MLX5E_FEC_RS_528_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_RS_544_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_LINK_MODE_FEC_LLRS_BIT, +}; static int get_fec_supported_advertised(struct mlx5_core_dev *dev, struct ethtool_link_ksettings *link_ksettings) { - u_long fec_caps = 0; - u32 active_fec = 0; - u32 offset; + u_long active_fec = 0; u32 bitn; int err; - err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps); + err = mlx5e_get_fec_mode(dev, (u32 *)&active_fec, NULL); if (err) return (err == -EOPNOTSUPP) ? 0 : err; - err = mlx5e_get_fec_mode(dev, &active_fec, NULL); - if (err) - return err; - - for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) { - u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn]; - - offset = ethtool_fec2ethtool_caps(ethtool_bitmask); - __set_bit(offset, link_ksettings->link_modes.supported); - } - - active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE); - offset = ethtool_fec2ethtool_caps(active_fec); - __set_bit(offset, link_ksettings->link_modes.advertising); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_NOFEC, + ETHTOOL_LINK_MODE_FEC_NONE_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_FIRECODE, + ETHTOOL_LINK_MODE_FEC_BASER_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_RS_528_514, + ETHTOOL_LINK_MODE_FEC_RS_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_LLRS_272_257_1, + ETHTOOL_LINK_MODE_FEC_LLRS_BIT); + + /* active fec is a bit set, find out which bit is set and + * advertise the corresponding ethtool bit + */ + bitn = find_first_bit(&active_fec, sizeof(u32) * BITS_PER_BYTE); + if (bitn < ARRAY_SIZE(pplm_fec_2_ethtool_linkmodes)) + __set_bit(pplm_fec_2_ethtool_linkmodes[bitn], + link_ksettings->link_modes.advertising); return 0; } @@ -773,6 +773,7 @@ static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, bool force_legacy, + u16 data_rate_oper, struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -784,7 +785,10 @@ static void get_speed_duplex(struct net_device *netdev, speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); if (!speed) { - speed = SPEED_UNKNOWN; + if (data_rate_oper) + speed = 100 * data_rate_oper; + else + speed = SPEED_UNKNOWN; goto out; } @@ -873,17 +877,18 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; - u32 rx_pause = 0; - u32 tx_pause = 0; - u32 eth_proto_cap; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; u32 eth_proto_admin; - u32 eth_proto_lp; - u32 eth_proto_oper; u8 an_disable_admin; - u8 an_status; + u16 data_rate_oper; + u32 eth_proto_oper; + u32 eth_proto_cap; u8 connector_type; + u32 rx_pause = 0; + u32 tx_pause = 0; + u32 eth_proto_lp; bool admin_ext; + u8 an_status; bool ext; int err; @@ -917,6 +922,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); an_status = MLX5_GET(ptys_reg, out, an_status); connector_type = MLX5_GET(ptys_reg, out, connector_type); + data_rate_oper = MLX5_GET(ptys_reg, out, data_rate_oper); mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); @@ -927,7 +933,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, admin_ext); get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext, - link_ksettings); + data_rate_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -1126,8 +1132,8 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, - u8 *hfunc) +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rss_params *rss = &priv->rss_params; @@ -1146,8 +1152,8 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, return 0; } -static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, - const u8 *key, const u8 hfunc) +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rss_params *rss = &priv->rss_params; @@ -1511,7 +1517,7 @@ static int mlx5e_get_fecparam(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u8 fec_configured = 0; + u16 fec_configured = 0; u32 fec_active = 0; int err; @@ -1527,7 +1533,7 @@ static int mlx5e_get_fecparam(struct net_device *netdev, return -EOPNOTSUPP; fecparam->fec = pplm2ethtool_fec((u_long)fec_configured, - sizeof(u8) * BITS_PER_BYTE); + sizeof(u16) * BITS_PER_BYTE); return 0; } @@ -1537,10 +1543,14 @@ static int mlx5e_set_fecparam(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u8 fec_policy = 0; + u16 fec_policy = 0; int mode; int err; + if (bitmap_weight((unsigned long *)&fecparam->fec, + ETHTOOL_FEC_LLRS_BIT + 1) > 1) + return -EOPNOTSUPP; + for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { if (!(pplm_fec_2_ethtool[mode] & fecparam->fec)) continue; @@ -1739,7 +1749,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, return 0; } - return mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1772,7 +1782,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return 0; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); if (err) return err; @@ -1829,7 +1839,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; } - return mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1873,7 +1883,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) return 0; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); return err; } @@ -1938,7 +1948,8 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) +int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1955,12 +1966,15 @@ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); } -static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { return mlx5e_ethtool_set_rxnfc(dev, cmd); } const struct ethtool_ops mlx5e_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_get_strings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4ef3dc79f73c..dd7f338425eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -63,6 +63,7 @@ #include "en/xsk/rx.h" #include "en/xsk/tx.h" #include "en/hv_vhca_stats.h" +#include "en/devlink.h" #include "lib/mlx5.h" @@ -1811,29 +1812,6 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c, - struct mlx5e_params *params) -{ - int num_comp_vectors = mlx5_comp_vectors_count(c->mdev); - int irq; - - if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL)) - return -ENOMEM; - - for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq)); - - cpumask_set_cpu(cpu, c->xps_cpumask); - } - - return 0; -} - -static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) -{ - free_cpumask_var(c->xps_cpumask); -} - static int mlx5e_open_queues(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1984,10 +1962,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->irq_desc = irq_to_desc(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_queues(c, params, cparam); @@ -2010,9 +1984,7 @@ err_close_queues: err_napi_del: netif_napi_del(&c->napi); - mlx5e_free_xps_cpumask(c); -err_free_channel: kvfree(c); return err; @@ -2026,7 +1998,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); @@ -2051,7 +2022,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_xsk(c); mlx5e_close_queues(c); netif_napi_del(&c->napi); - mlx5e_free_xps_cpumask(c); kvfree(c); } @@ -2801,6 +2771,8 @@ free_in: return err; } +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro); + static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 mtu) { @@ -2850,6 +2822,8 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) return 0; } +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu); + void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) { struct mlx5e_params *params = &priv->channels.params; @@ -2886,6 +2860,54 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } +static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv, u16 count) +{ + int num_txqs = count * priv->channels.params.num_tc; + int num_rxqs = count * priv->profile->rq_groups; + struct net_device *netdev = priv->netdev; + + mlx5e_netdev_set_tcs(netdev); + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, num_rxqs); +} + +static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_comp_vectors, ix, irq; + + num_comp_vectors = mlx5_comp_vectors_count(mdev); + + for (ix = 0; ix < params->num_channels; ix++) { + cpumask_clear(priv->scratchpad.cpumask); + + for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq)); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); + } + + netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); + } +} + +int mlx5e_num_channels_changed(struct mlx5e_priv *priv) +{ + u16 count = priv->channels.params.num_channels; + + mlx5e_update_netdev_queues(priv, count); + mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); + + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + + return 0; +} + +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed); + static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) { int i, ch; @@ -2907,14 +2929,6 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { - int num_txqs = priv->channels.num * priv->channels.params.num_tc; - int num_rxqs = priv->channels.num * priv->profile->rq_groups; - struct net_device *netdev = priv->netdev; - - mlx5e_netdev_set_tcs(netdev); - netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, num_rxqs); - mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); mlx5e_xdp_tx_enable(priv); @@ -2947,42 +2961,52 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) mlx5e_deactivate_channels(&priv->channels); } -static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) +static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_preactivate preactivate, + void *context) { struct net_device *netdev = priv->netdev; - int new_num_txqs; + struct mlx5e_channels old_chs; int carrier_ok; - - new_num_txqs = new_chs->num * new_chs->params.num_tc; + int err = 0; carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); - if (new_num_txqs < netdev->real_num_tx_queues) - netif_set_real_num_tx_queues(netdev, new_num_txqs); - mlx5e_deactivate_priv_channels(priv); - mlx5e_close_channels(&priv->channels); + old_chs = priv->channels; priv->channels = *new_chs; - /* New channels are ready to roll, modify HW settings if needed */ - if (hw_modify) - hw_modify(priv); + /* New channels are ready to roll, call the preactivate hook if needed + * to modify HW settings or update kernel parameters. + */ + if (preactivate) { + err = preactivate(priv, context); + if (err) { + priv->channels = old_chs; + goto out; + } + } + mlx5e_close_channels(&old_chs); priv->profile->update_rx(priv); + +out: mlx5e_activate_priv_channels(priv); /* return carrier back if needed */ if (carrier_ok) netif_carrier_on(netdev); + + return err; } int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) + mlx5e_fp_preactivate preactivate, + void *context) { int err; @@ -2990,8 +3014,16 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, if (err) return err; - mlx5e_switch_priv_channels(priv, new_chs, hw_modify); + err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); + if (err) + goto err_close; + return 0; + +err_close: + mlx5e_close_channels(new_chs); + + return err; } int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) @@ -2999,7 +3031,7 @@ int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) struct mlx5e_channels new_channels = {}; new_channels.params = priv->channels.params; - return mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); } void mlx5e_timestamp_init(struct mlx5e_priv *priv) @@ -3448,7 +3480,8 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_num_channels_changed_ctx, NULL); if (err) goto out; @@ -3661,7 +3694,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro); + err = mlx5e_safe_switch_channels(priv, &new_channels, + mlx5e_modify_tirs_lro_ctx, NULL); out: mutex_unlock(&priv->state_lock); return err; @@ -3880,7 +3914,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, } int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, - change_hw_mtu_cb set_mtu_cb) + mlx5e_fp_preactivate preactivate) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; @@ -3929,13 +3963,13 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (!reset) { params->sw_mtu = new_mtu; - if (set_mtu_cb) - set_mtu_cb(priv); + if (preactivate) + preactivate(priv, NULL); netdev->mtu = params->sw_mtu; goto out; } - err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb); + err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL); if (err) goto out; @@ -3948,7 +3982,7 @@ out: static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) { - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); } int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) @@ -4409,7 +4443,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) mlx5e_set_rq_type(priv->mdev, &new_channels.params); old_prog = priv->channels.params.xdp_prog; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); if (err) goto unlock; } else { @@ -4589,6 +4623,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, #endif + .ndo_get_devlink_port = mlx5e_get_devlink_port, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -4787,9 +4822,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, mlx5e_build_rq_params(mdev, params); /* HW LRO */ - - /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (MLX5_CAP_ETH(mdev, lro_cap) && + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { /* No XSK params: checking the availability of striding RQ in general. */ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) params->lro_en = !slow_pci_heuristic(mdev); @@ -5230,6 +5264,9 @@ int mlx5e_netdev_init(struct net_device *netdev, priv->max_nch = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); priv->max_opened_tc = 1; + if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) + return -ENOMEM; + mutex_init(&priv->state_lock); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); @@ -5238,7 +5275,7 @@ int mlx5e_netdev_init(struct net_device *netdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - return -ENOMEM; + goto err_free_cpumask; /* netdev init */ netif_carrier_off(netdev); @@ -5248,11 +5285,17 @@ int mlx5e_netdev_init(struct net_device *netdev, #endif return 0; + +err_free_cpumask: + free_cpumask_var(priv->scratchpad.cpumask); + + return -ENOMEM; } void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) { destroy_workqueue(priv->wq); + free_cpumask_var(priv->scratchpad.cpumask); } struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, @@ -5287,6 +5330,7 @@ err_free_netdev: int mlx5e_attach_netdev(struct mlx5e_priv *priv) { + const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; const struct mlx5e_profile *profile; int max_nch; int err; @@ -5298,10 +5342,25 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) max_nch = mlx5e_get_max_num_channels(priv->mdev); if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); + /* Reducing the number of channels - RXFH has to be reset, and + * mlx5e_num_channels_changed below will build the RQT. + */ + priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; priv->channels.params.num_channels = max_nch; - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, max_nch); } + /* 1. Set the real number of queues in the kernel the first time. + * 2. Set our default XPS cpumask. + * 3. Build the RQT. + * + * rtnl_lock is required by netif_set_real_num_*_queues in case the + * netdev has been registered by this point (if this function was called + * in the reload or resume flow). + */ + if (take_rtnl) + rtnl_lock(); + mlx5e_num_channels_changed(priv); + if (take_rtnl) + rtnl_unlock(); err = profile->init_tx(priv); if (err) @@ -5425,17 +5484,27 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) goto err_destroy_netdev; } + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_detach; + } + err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_detach; + goto err_devlink_port_unregister; } + mlx5e_devlink_port_type_eth_set(priv); + #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); #endif return priv; +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(priv); err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: @@ -5457,6 +5526,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif + mlx5e_devlink_port_unregister(priv); unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6ed307d7f191..2a0243e4af75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -41,7 +41,7 @@ #include <net/ipv6_stubs.h> #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "en.h" #include "en_rep.h" #include "en_tc.h" @@ -192,7 +192,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats); if (err) { - pr_warn("vport %d error %d reading stats\n", rep->vport, err); + netdev_warn(priv->netdev, "vport %d error %d reading stats\n", + rep->vport, err); return; } @@ -252,25 +253,6 @@ static int mlx5e_rep_set_ringparam(struct net_device *dev, return mlx5e_ethtool_set_ringparam(priv, param); } -static int mlx5e_replace_rep_vport_rx_rule(struct mlx5e_priv *priv, - struct mlx5_flow_destination *dest) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5_flow_handle *flow_rule; - - flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, - rep->vport, - dest); - if (IS_ERR(flow_rule)) - return PTR_ERR(flow_rule); - - mlx5_del_flow_rules(rpriv->vport_rx_rule); - rpriv->vport_rx_rule = flow_rule; - return 0; -} - static void mlx5e_rep_get_channels(struct net_device *dev, struct ethtool_channels *ch) { @@ -283,33 +265,8 @@ static int mlx5e_rep_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - u16 curr_channels_amount = priv->channels.params.num_channels; - u32 new_channels_amount = ch->combined_count; - struct mlx5_flow_destination new_dest; - int err = 0; - - err = mlx5e_ethtool_set_channels(priv, ch); - if (err) - return err; - if (curr_channels_amount == 1 && new_channels_amount > 1) { - new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - new_dest.ft = priv->fs.ttc.ft.t; - } else if (new_channels_amount == 1 && curr_channels_amount > 1) { - new_dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - new_dest.tir_num = priv->direct_tir[0].tirn; - } else { - return 0; - } - - err = mlx5e_replace_rep_vport_rx_rule(priv, &new_dest); - if (err) { - netdev_warn(priv->netdev, "Failed to update vport rx rule, when going from (%d) channels to (%d) channels\n", - curr_channels_amount, new_channels_amount); - return err; - } - - return 0; + return mlx5e_ethtool_set_channels(priv, ch); } static int mlx5e_rep_get_coalesce(struct net_device *netdev, @@ -375,6 +332,9 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, } static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -391,6 +351,9 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { }; static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -406,6 +369,10 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, + .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; @@ -727,9 +694,9 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) static int mlx5e_rep_indr_offload(struct net_device *netdev, struct flow_cls_offload *flower, - struct mlx5e_rep_indr_block_priv *indr_priv) + struct mlx5e_rep_indr_block_priv *indr_priv, + unsigned long flags) { - unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); int err = 0; @@ -750,20 +717,68 @@ mlx5e_rep_indr_offload(struct net_device *netdev, return err; } -static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, - void *type_data, void *indr_priv) +static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) { + unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_rep_indr_block_priv *priv = indr_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, + flags); default: return -EOPNOTSUPP; } } -static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) +static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + struct flow_cls_offload *f = type_data; + struct flow_cls_offload tmp; + struct mlx5e_priv *mpriv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + mpriv = netdev_priv(priv->rpriv->netdev); + esw = mpriv->mdev->priv.eswitch; + + flags = MLX5_TC_FLAG(EGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (!mlx5_esw_chains_prios_supported(esw) || + tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_rep_indr_block_unbind(void *cb_priv) { struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; @@ -774,9 +789,10 @@ static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) static LIST_HEAD(mlx5e_block_cb_list); static int -mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, - struct mlx5e_rep_priv *rpriv, - struct flow_block_offload *f) +mlx5e_rep_indr_setup_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb) { struct mlx5e_rep_indr_block_priv *indr_priv; struct flow_block_cb *block_cb; @@ -802,9 +818,8 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb, - indr_priv, indr_priv, - mlx5e_rep_indr_tc_block_unbind); + block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -819,9 +834,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (!indr_priv) return -ENOENT; - block_cb = flow_block_cb_lookup(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv); + block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); if (!block_cb) return -ENOENT; @@ -835,13 +848,16 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, } static -int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) +int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_BLOCK: - return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, - type_data); + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_tc_cb); + case TC_SETUP_FT: + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_ft_cb); default: return -EOPNOTSUPP; } @@ -853,7 +869,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, int err; err = __flow_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_tc_cb, + mlx5e_rep_indr_setup_cb, rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -867,7 +883,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, struct net_device *netdev) { - __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb, rpriv); } @@ -1279,8 +1295,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, case TC_SETUP_CLSFLOWER: memcpy(&tmp, f, sizeof(*f)); - if (!mlx5_esw_chains_prios_supported(esw) || - tmp.common.chain_index) + if (!mlx5_esw_chains_prios_supported(esw)) return -EOPNOTSUPP; /* Re-use tc offload path by moving the ft flow to the @@ -1396,7 +1411,7 @@ static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) { - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); } static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) @@ -1422,7 +1437,7 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1435,7 +1450,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_get_devlink_port, + .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -1448,7 +1463,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_start_xmit = mlx5e_xmit, .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_get_devlink_port, + .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, .ndo_get_stats64 = mlx5e_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -1464,6 +1479,11 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_set_features = mlx5e_set_features, }; +bool mlx5e_eswitch_uplink_rep(struct net_device *netdev) +{ + return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep; +} + bool mlx5e_eswitch_rep(struct net_device *netdev) { if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || @@ -1584,6 +1604,8 @@ static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct ttc_params ttc_params = {}; int tt, err; @@ -1593,6 +1615,11 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) /* The inner_ttc in the ttc params is intentionally not set */ ttc_params.any_tt_tirn = priv->direct_tir[0].tirn; mlx5e_set_ttc_ft_params(&ttc_params); + + if (rep->vport != MLX5_VPORT_UPLINK) + /* To give uplik rep TTC a lower level for chaining from root ft */ + ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; @@ -1604,6 +1631,52 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) return 0; } +static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + if (rep->vport != MLX5_VPORT_UPLINK) { + /* non uplik reps will skip any bypass tables and go directly to + * their own ttc + */ + rpriv->root_ft = priv->fs.ttc.ft.t; + return 0; + } + + /* uplink root ft will be used to auto chain, to ethtool or ttc tables */ + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + netdev_err(priv->netdev, "Failed to get reps offloads namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */ + ft_attr.prio = 1; + ft_attr.level = 1; + + rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(rpriv->root_ft)) { + err = PTR_ERR(rpriv->root_ft); + rpriv->root_ft = NULL; + } + + return err; +} + +static void mlx5e_destroy_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return; + mlx5_destroy_flow_table(rpriv->root_ft); +} + static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1612,11 +1685,10 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) struct mlx5_flow_handle *flow_rule; struct mlx5_flow_destination dest; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = priv->direct_tir[0].tirn; - flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, - rep->vport, - &dest); + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rpriv->root_ft; + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, &dest); if (IS_ERR(flow_rule)) return PTR_ERR(flow_rule); rpriv->vport_rx_rule = flow_rule; @@ -1656,12 +1728,20 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_tirs; - err = mlx5e_create_rep_vport_rx_rule(priv); + err = mlx5e_create_rep_root_ft(priv); if (err) goto err_destroy_ttc_table; + err = mlx5e_create_rep_vport_rx_rule(priv); + if (err) + goto err_destroy_root_ft; + + mlx5e_ethtool_init_steering(priv); + return 0; +err_destroy_root_ft: + mlx5e_destroy_rep_root_ft(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: @@ -1682,6 +1762,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; mlx5_del_flow_rules(rpriv->vport_rx_rule); + mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, false); @@ -1920,7 +2001,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_update_ndo_stats, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, @@ -1940,7 +2021,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = MLX5E_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_ul_rep_stats_grps, @@ -2026,8 +2107,9 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { - pr_warn("Failed to create representor netdev for vport %d\n", - rep->vport); + mlx5_core_warn(dev, + "Failed to create representor netdev for vport %d\n", + rep->vport); kfree(rpriv); return -EINVAL; } @@ -2045,29 +2127,32 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { - pr_warn("Failed to attach representor netdev for vport %d\n", - rep->vport); + netdev_warn(netdev, + "Failed to attach representor netdev for vport %d\n", + rep->vport); goto err_destroy_mdev_resources; } err = mlx5e_rep_neigh_init(rpriv); if (err) { - pr_warn("Failed to initialized neighbours handling for vport %d\n", - rep->vport); + netdev_warn(netdev, + "Failed to initialized neighbours handling for vport %d\n", + rep->vport); goto err_detach_netdev; } err = register_devlink_port(dev, rpriv); if (err) { - esw_warn(dev, "Failed to register devlink port %d\n", - rep->vport); + netdev_warn(netdev, "Failed to register devlink port %d\n", + rep->vport); goto err_neigh_cleanup; } err = register_netdev(netdev); if (err) { - pr_warn("Failed to register representor netdev for vport %d\n", - rep->vport); + netdev_warn(netdev, + "Failed to register representor netdev for vport %d\n", + rep->vport); goto err_devlink_cleanup; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 3f756d51435f..6a2337900420 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -55,6 +55,7 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_tc_ct_priv; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -81,12 +82,20 @@ struct mlx5_rep_uplink_priv { struct mutex unready_flows_lock; struct list_head unready_flows; struct work_struct reoffload_flows_work; + + /* maps tun_info to a unique id*/ + struct mapping_ctx *tunnel_mapping; + /* maps tun_enc_opts to a unique id*/ + struct mapping_ctx *tunnel_enc_opts_mapping; + + struct mlx5_tc_ct_priv *ct_priv; }; struct mlx5e_rep_priv { struct mlx5_eswitch_rep *rep; struct mlx5e_neigh_update_table neigh_update; struct net_device *netdev; + struct mlx5_flow_table *root_ft; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ @@ -191,6 +200,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); @@ -200,6 +211,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); bool mlx5e_eswitch_rep(struct net_device *netdev); +bool mlx5e_eswitch_uplink_rep(struct net_device *netdev); #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 312d4692425b..6173faf542b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -158,7 +158,8 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, mlx5e_read_mini_arr_slot(wq, cqd, cqcc); mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); - rq->handle_rx_cqe(rq, &cqd->title); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, &cqd->title); } mlx5e_cqes_update_owner(wq, cqcc - wq->cc); wq->cc = cqcc; @@ -178,7 +179,8 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, mlx5e_read_title_slot(rq, wq, cc); mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); mlx5e_decompress_cqe(rq, wq, cc); - rq->handle_rx_cqe(rq, &cqd->title); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, &cqd->title); cqd->mini_arr_idx++; return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; @@ -1192,6 +1194,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; struct sk_buff *skb; @@ -1224,13 +1227,78 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + goto free_wqe; + napi_gro_receive(rq->cq.napi, skb); + mlx5_tc_rep_post_napi_receive(&tc_priv); + free_wqe: mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } + +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_tc_update_priv tc_priv = {}; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + rq->stats->wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + goto mpwrq_cqe_out; + + napi_gro_receive(rq->cq.napi, skb); + + mlx5_tc_rep_post_napi_receive(&tc_priv); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} #endif struct sk_buff * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ec5fc52bf572..438128dde187 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -51,14 +51,18 @@ #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "en/mapping.h" +#include "en/tc_ct.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "diag/en_tc_tracepoint.h" +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; @@ -84,6 +88,7 @@ enum { MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -134,6 +139,8 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -144,15 +151,118 @@ struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; - int num_mod_hdr_actions; - int max_mod_hdr_actions; - void *mod_hdr_actions; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 6 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 2 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 2, + }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 3, + .mlen = 1, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, + [ZONE_TO_REG] = zone_to_reg_ct, + [CTSTATE_TO_REG] = ctstate_to_reg_ct, + [MARK_TO_REG] = mark_to_reg_ct, + [LABELS_TO_REG] = labels_to_reg_ct, + [FTEID_TO_REG] = fteid_to_reg_ct, + [TUPLEID_TO_REG] = tupleid_to_reg_ct, +}; + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask) +{ + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + void *headers_c = spec->match_criteria; + void *headers_v = spec->match_value; + void *fmask, *fval; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); + data = cpu_to_be32(data) >> (32 - (match_len * 8)); + + memcpy(fmask, &mask, match_len); + memcpy(fval, &data, match_len); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_hdr_acts); + if (err) + return err; + + modact = mod_hdr_acts->actions + + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); + mod_hdr_acts->num_actions++; + + return 0; +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -210,8 +320,6 @@ struct mlx5e_mod_hdr_entry { int compl_result; }; -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) - static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); @@ -361,10 +469,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mod_hdr_key key; u32 hash_key; - num_actions = parse_attr->num_mod_hdr_actions; + num_actions = parse_attr->mod_hdr_acts.num_actions; actions_size = MLX5_MH_ACT_SZ * num_actions; - key.actions = parse_attr->mod_hdr_actions; + key.actions = parse_attr->mod_hdr_acts.actions; key.num_actions = num_actions; hash_key = hash_mod_hdr_info(&key); @@ -954,7 +1062,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_hdr = attr->modify_hdr; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1043,8 +1151,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; + if (flow_flag_test(flow, CT)) { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr, + mod_hdr_acts); + } + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(rule)) return rule; @@ -1063,10 +1179,15 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) + struct mlx5_esw_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); + if (flow_flag_test(flow, CT)) { + mlx5_tc_ct_delete_flow(flow->priv, flow, attr); + return; + } + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -1076,17 +1197,17 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, static struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *slow_attr) + struct mlx5_flow_spec *spec) { + struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; - memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); - slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr)); + slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr.split_count = 0; + slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr); if (!IS_ERR(rule)) flow_flag_set(flow, SLOW); @@ -1095,14 +1216,15 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *slow_attr) + struct mlx5e_tc_flow *flow) { - memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); - slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + struct mlx5_esw_flow_attr slow_attr; + + memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr)); + slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr.split_count = 0; + slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr); flow_flag_clear(flow, SLOW); } @@ -1173,7 +1295,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, int out_index; if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) { - NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); + NL_SET_ERR_MSG_MOD(extack, + "E-switch priorities unsupported, upgrade FW"); return -EOPNOTSUPP; } @@ -1184,13 +1307,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, */ max_chain = mlx5_esw_chains_get_chain_range(esw); if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { - NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); + NL_SET_ERR_MSG_MOD(extack, + "Requested chain is out of supported range"); return -EOPNOTSUPP; } max_prio = mlx5_esw_chains_get_prio_range(esw); if (attr->prio > max_prio) { - NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); + NL_SET_ERR_MSG_MOD(extack, + "Requested priority is out of supported range"); return -EOPNOTSUPP; } @@ -1220,7 +1345,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1237,14 +1362,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid) { - /* continue with goto slow path rule instead */ - struct mlx5_esw_flow_attr slow_attr; - - flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr); - } else { + if (!encap_valid) + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); + else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); - } if (IS_ERR(flow->rule[0])) return PTR_ERR(flow->rule[0]); @@ -1272,9 +1393,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5_esw_flow_attr slow_attr; int out_index; + mlx5e_put_flow_tunnel_id(flow); + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); @@ -1283,7 +1405,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) - mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + mlx5e_tc_unoffload_from_slow_path(esw, flow); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } @@ -1312,7 +1434,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; @@ -1365,7 +1487,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, continue; } - mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + mlx5e_tc_unoffload_from_slow_path(esw, flow); flow->rule[0] = rule; /* was unset when slow path rule removed */ flow_flag_set(flow, OFFLOADED); @@ -1377,7 +1499,6 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; @@ -1389,7 +1510,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); /* mark the flow's encap dest as non-valid */ flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; @@ -1664,150 +1785,272 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; -static int parse_tunnel_attr(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct net_device *filter_dev, u8 *match_level) + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != U16_MAX || + opt->type != U8_MAX || + memchr_inv(opt->opt_data, 0xFF, + opt->length * 4)) { + NL_SET_ERR_MSG(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) { - struct netlink_ext_ack *extack = f->common.extack; - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; int err; - err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v, match_level); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "failed to parse tunnel attributes"); + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) return err; - } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - u16 addr_type; + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; - flow_rule_match_enc_control(rule, &match); - addr_type = match.key->addr_type; + if (!enc_opts_is_dont_care) { + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_match.key, &enc_opts_id); + if (err) + goto err_enc_opts; + } - /* For tunnel addr_type used same key id`s as for non-tunnel */ - if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_match_ipv4_addrs match; + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); - flow_rule_match_enc_ipv4_addrs(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.key->src)); + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, + TUNNEL_TO_REG, value); + if (err) + goto err_set; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.key->dst)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IP); - } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_match_ipv6_addrs match; - - flow_rule_match_enc_ipv6_addrs(rule, &match); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IPV6); - } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_match_ip match; + flow->tunnel_id = value; + return 0; - flow_rule_match_enc_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, - match.mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, - match.key->tos >> 2); +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, - match.mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, - match.key->ttl); + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} - if (match.mask->ttl && - !MLX5_CAP_ESW_FLOWTABLE_FDB - (priv->mdev, - ft_field_support.outer_ipv4_ttl)) { +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) +{ + return flow->tunnel_id; +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; + int err; + + if (!mlx5e_is_eswitch_flow(flow)) + return -EOPNOTSUPP; + + needs_mapping = !!flow->esw_attr->chain; + sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + NL_SET_ERR_MSG(extack, + "Chains on tunnel devices isn't supported without register loopback support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register loopback support"); + return -EOPNOTSUPP; + } + + if (!flow->esw_attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "Matching on TTL is not supported"); - return -EOPNOTSUPP; + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; } + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); + if (!needs_mapping && !sets_mapping) + return 0; - /* let software handle IP fragments */ - MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); +} - return 0; +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) { - return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); } static void *get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_value, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +static void *get_match_headers_criteria(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); } static int mlx5e_flower_parse_meta(struct net_device *filter_dev, @@ -1845,6 +2088,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, } static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, @@ -1885,6 +2129,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); @@ -1894,18 +2139,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, - outer_match_level)) - return -EOPNOTSUPP; + bool match_inner = false; - /* At this point, header pointers should point to the inner - * headers, outer header were already set by parse_tunnel_attr - */ - match_level = inner_match_level; - headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); - headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2222,8 +2471,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, inner_match_level = MLX5_MATCH_NONE; outer_match_level = MLX5_MATCH_NONE; - err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, - &outer_match_level); + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; @@ -2383,25 +2632,26 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, attr->num_mod_hdr_actions - * says how many HW actions were actually parsed. - */ -static int offload_pedit_fields(struct pedit_headers_action *hdrs, +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last, next_z; + int i, action_size, first, last, next_z; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; unsigned long mask; __be32 mask_be32; __be16 mask_be16; + int err; u8 cmd; + mod_acts = &parse_attr->mod_hdr_acts; headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); @@ -2411,11 +2661,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions + - parse_attr->num_mod_hdr_actions * action_size; - - max_actions = parse_attr->max_mod_hdr_actions; - nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2441,13 +2686,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } - if (nactions == max_actions) { - NL_SET_ERR_MSG_MOD(extack, - "too many pedit actions, can't offload"); - printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); - return -EOPNOTSUPP; - } - skip = false; if (s_mask) { void *match_mask = headers_c + f->match_offset; @@ -2495,6 +2733,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } + err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return err; + } + + action = mod_acts->actions + + (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2517,11 +2767,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, else if (f->field_bsize == 8) MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); - action += action_size; - nactions++; + ++mod_acts->num_actions; } - parse_attr->num_mod_hdr_actions = nactions; return 0; } @@ -2534,34 +2782,52 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); } -static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - struct pedit_headers_action *hdrs, - int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr) +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - int nkeys, action_size, max_actions; + int action_size, new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; - nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + return 0; - max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); - /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ - max_actions = min(max_actions, nkeys * 16); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); - if (!parse_attr->mod_hdr_actions) + max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, + namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return -ENOSPC; + + new_sz = action_size * new_num_actions; + old_sz = mod_hdr_acts->max_actions * action_size; + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (!ret) return -ENOMEM; - parse_attr->max_mod_hdr_actions = max_actions; + memset(ret + old_sz, 0, new_sz - old_sz); + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + return 0; } +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + kfree(mod_hdr_acts->actions); + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { @@ -2609,13 +2875,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, int err; u8 cmd; - if (!parse_attr->mod_hdr_actions) { - err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; - } - - err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); + err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, + action_flags, extack); if (err < 0) goto out_dealloc_parsed_actions; @@ -2635,8 +2896,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, return 0; out_dealloc_parsed_actions: - kfree(parse_attr->mod_hdr_actions); -out_err: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); return err; } @@ -2681,7 +2941,9 @@ struct ipv6_hoplimit_word { __u8 hop_limit; }; -static bool is_action_keys_supported(const struct flow_action_entry *act) +static int is_action_keys_supported(const struct flow_action_entry *act, + bool ct_flow, bool *modify_ip_header, + struct netlink_ext_ack *extack) { u32 mask, offset; u8 htype; @@ -2700,7 +2962,13 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct iphdr, ttl) || ttl_word->protocol || ttl_word->check) { - return true; + *modify_ip_header = true; + } + + if (ct_flow && offset >= offsetof(struct iphdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv4 address with action ct"); + return -EOPNOTSUPP; } } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { struct ipv6_hoplimit_word *hoplimit_word = @@ -2709,15 +2977,27 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct ipv6hdr, payload_len) || hoplimit_word->payload_len || hoplimit_word->nexthdr) { - return true; + *modify_ip_header = true; } + + if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv6 address with action ct"); + return -EOPNOTSUPP; + } + } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || + htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of transport header ports with action ct"); + return -EOPNOTSUPP; } - return false; + + return 0; } static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, - u32 actions, + u32 actions, bool ct_flow, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; @@ -2725,7 +3005,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, void *headers_v; u16 ethertype; u8 ip_proto; - int i; + int i, err; headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); @@ -2740,10 +3020,10 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, act->id != FLOW_ACTION_ADD) continue; - if (is_action_keys_supported(act)) { - modify_ip_header = true; - break; - } + err = is_action_keys_supported(act, ct_flow, + &modify_ip_header, extack); + if (err) + return err; } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); @@ -2765,23 +3045,29 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + bool ct_flow; u32 actions; - if (mlx5e_is_eswitch_flow(flow)) + ct_flow = flow_flag_test(flow, CT); + if (mlx5e_is_eswitch_flow(flow)) { actions = flow->esw_attr->action; - else - actions = flow->nic_attr->action; - if (flow_flag_test(flow, EGRESS) && - !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) - return false; + if (flow->esw_attr->split_count && ct_flow) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, + "Can't offload mirroring with action ct"); + return false; + } + } else { + actions = flow->nic_attr->action; + } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, flow_action, actions, - extack); + ct_flow, extack); return true; } @@ -2837,8 +3123,7 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, - hdrs, NULL); + err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; return err; @@ -2883,6 +3168,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) + return -EOPNOTSUPP; + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; flow_action_for_each(i, act, flow_action) { @@ -2900,7 +3189,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, hdrs, extack); + hdrs, extack); if (err) return err; @@ -2969,9 +3258,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, /* in case all pedit actions are skipped, remove the MOD_HDR * flag. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); } } @@ -3314,6 +3603,85 @@ static bool is_duplicated_output_device(struct net_device *dev, return false; } +static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + const struct flow_action_entry *act, + u32 actions, + struct netlink_ext_ack *extack) +{ + u32 max_chain = mlx5_esw_chains_get_chain_range(esw); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_esw_chains_backwards_supported(esw) && + dest_chain <= attr->chain) { + NL_SET_ERR_MSG_MOD(extack, + "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored esw_attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } + return 0; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -3328,13 +3696,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + bool encap = false, decap = false; + u32 action = attr->action; int err, i, if_count = 0; - bool encap = false; - u32 action = 0; if (!flow_action_has_entries(flow_action)) return -EINVAL; + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -3344,7 +3716,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, hdrs, extack); + hdrs, extack); if (err) return err; @@ -3382,8 +3754,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, "can't support more output ports, can't offload forwarding"); - pr_err("can't support more than %d output ports, can't offload forwarding\n", - attr->out_count); + netdev_warn(priv->netdev, + "can't support more than %d output ports, can't offload forwarding\n", + attr->out_count); return -EOPNOTSUPP; } @@ -3441,11 +3814,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } + err = verify_uplink_forwarding(priv, flow, out_dev, extack); + if (err) + return err; + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); - pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", - priv->netdev->name, out_dev->name); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, + out_dev->name); return -EOPNOTSUPP; } @@ -3464,8 +3843,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); - pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", - priv->netdev->name, out_dev->name); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, + out_dev->name); return -EINVAL; } } @@ -3507,28 +3888,24 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->split_count = attr->out_count; break; case FLOW_ACTION_TUNNEL_DECAP: - action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + decap = true; break; - case FLOW_ACTION_GOTO: { - u32 dest_chain = act->chain_index; - u32 max_chain = mlx5_esw_chains_get_chain_range(esw); + case FLOW_ACTION_GOTO: + err = mlx5_validate_goto_chain(esw, flow, act, action, + extack); + if (err) + return err; - if (ft_flow) { - NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); - return -EOPNOTSUPP; - } - if (dest_chain <= attr->chain) { - NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); - return -EOPNOTSUPP; - } - if (dest_chain > max_chain) { - NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); - return -EOPNOTSUPP; - } action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->dest_chain = dest_chain; + attr->dest_chain = act->chain_index; + break; + case FLOW_ACTION_CT: + err = mlx5_tc_ct_parse_action(priv, attr, act, extack); + if (err) + return err; + + flow_flag_set(flow, CT); break; - } default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); return -EOPNOTSUPP; @@ -3557,9 +3934,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * flag. we might have set split_count either by pedit or * pop/push. if there is no pop/push either, reset it too. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) attr->split_count = 0; @@ -3571,8 +3948,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (attr->dest_chain) { + if (decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG(extack, + "Decap with goto isn't supported"); + netdev_warn(priv->netdev, + "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); + NL_SET_ERR_MSG_MOD(extack, + "Mirroring goto chain rules isn't supported"); return -EOPNOTSUPP; } attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; @@ -3580,7 +3974,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!(attr->action & (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action"); + NL_SET_ERR_MSG_MOD(extack, + "Rule must have at least one forward/drop action"); return -EOPNOTSUPP; } @@ -3751,6 +4146,10 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack); + if (err) + goto err_free; + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { @@ -4035,7 +4434,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, goto errout; } - if (mlx5e_is_offloaded_flow(flow)) { + if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { counter = mlx5e_tc_get_counter(flow); if (!counter) goto errout; @@ -4069,7 +4468,8 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, no_peer_counter: mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: - flow_stats_update(&f->stats, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); trace_mlx5e_stats_flower(f); errout: mlx5e_flow_put(priv, flow); @@ -4126,6 +4526,9 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!flow_action_basic_hw_stats_check(flow_action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: @@ -4147,8 +4550,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; + if (!mlx5_esw_qos_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); + return -EOPNOTSUPP; + } + if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; @@ -4177,7 +4586,8 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; rpriv->prev_vf_vport_stats = cur_stats; - flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies, + FLOW_ACTION_HW_STATS_DELAYED); } static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, @@ -4295,12 +4705,63 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { - return rhashtable_init(tc_ht, &tc_ht_params); + const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts); + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *priv; + struct mapping_ctx *mapping; + int err; + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + + err = mlx5_tc_ct_init(uplink_priv); + if (err) + goto err_ct; + + mapping = mapping_create(sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + goto err_ht_init; + + return err; + +err_ht_init: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + mlx5_tc_ct_clean(uplink_priv); +err_ct: + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + return err; } void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { + struct mlx5_rep_uplink_priv *uplink_priv; + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + + mlx5_tc_ct_clean(uplink_priv); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) @@ -4332,3 +4793,147 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) } mutex_unlock(&rpriv->unready_flows_lock); } + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct flow_dissector_key_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + tun_dst = tun_rx_dst(enc_opts.len); + if (!tun_dst) { + WARN_ON_ONCE(true); + return false; + } + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + 0, /* label */ + key.enc_tp.src, key.enc_tp.dst, + key32_to_tunnel_id(key.enc_key_id.keyid), + TUNNEL_KEY); + + if (enc_opts.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data, + enc_opts.len, enc_opts.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set tun_dev so we do dev_put() after datapath */ + tc_priv->tun_dev = dev; + + skb->dev = dev; + + return true; +} +#endif /* CONFIG_NET_TC_SKB_EXT */ + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + int tunnel_moffset; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + reg_c0 = 0; + reg_c1 = be32_to_cpu(cqe->imm_inval_pkey); + + if (!reg_c0) + return true; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + reg_c0, err); + return false; + } + + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + tc_skb_ext->chain = chain; + + tuple_id = reg_c1 & TUPLE_ID_MAX; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) + return false; + } + + tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; + tunnel_id = reg_c1 >> (8 * tunnel_moffset); + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 262cdb7b69b1..abdcfa4c4e0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -91,9 +91,63 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +enum mlx5e_tc_attr_to_reg { + CHAIN_TO_REG, + TUNNEL_TO_REG, + CTSTATE_TO_REG, + ZONE_TO_REG, + MARK_TO_REG, + LABELS_TO_REG, + FTEID_TO_REG, + TUPLEID_TO_REG, +}; + +struct mlx5e_tc_attr_to_reg_mapping { + int mfield; /* rewrite field */ + int moffset; /* offset of mfield */ + int mlen; /* bytes to rewrite/match */ + + int soffset; /* offset of spec for match */ +}; + +extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); +struct mlx5e_tc_update_priv { + struct net_device *tun_dev; +}; + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv); + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); + +struct mlx5e_tc_mod_hdr_acts { + int num_actions; + int max_actions; + void *actions; +}; + +int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask); + +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); + +struct mlx5e_tc_flow; +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ee60383adc5b..fd6b2a1898c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -72,8 +72,8 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { int txq_ix = netdev_pick_tx(dev, skb, NULL); struct mlx5e_priv *priv = netdev_priv(dev); - u16 num_channels; int up = 0; + int ch_ix; if (!netdev_get_num_tc(dev)) return txq_ix; @@ -86,14 +86,13 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, if (skb_vlan_tag_present(skb)) up = skb_vlan_tag_get_prio(skb); - /* txq_ix can be larger than num_channels since - * dev->num_real_tx_queues = num_channels * num_tc + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. */ - num_channels = priv->channels.params.num_channels; - if (txq_ix >= num_channels) - txq_ix = priv->txq2sq[txq_ix]->ch_ix; + ch_ix = priv->txq2sq[txq_ix]->ch_ix; - return priv->channel_tc2realtxq[txq_ix][up]; + return priv->channel_tc2realtxq[ch_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 800d34ed8a96..87c49e7a164c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -31,6 +31,7 @@ */ #include <linux/irq.h> +#include <linux/indirect_call_wrapper.h> #include "en.h" #include "en/xdp.h" #include "en/xsk/rx.h" @@ -100,7 +101,10 @@ static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskr busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); mlx5e_xsk_update_tx_wakeup(xsksq); - xsk_rx_alloc_err = xskrq->post_wqes(xskrq); + xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + xskrq); busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); return busy_xsk; @@ -143,7 +147,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_poll_ico_cq(&c->icosq.cq); - busy |= rq->post_wqes(rq); + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); if (xsk_open) { mlx5e_poll_ico_cq(&c->xskicosq.cq); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile new file mode 100644 index 000000000000..c78512eed8d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c index 4276194b633f..029001040737 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c @@ -5,23 +5,26 @@ #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/fs.h> -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" +#include "en/mapping.h" #include "mlx5_core.h" #include "fs_core.h" #include "eswitch.h" #include "en.h" +#include "en_tc.h" #define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) #define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) #define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) #define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) #define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) #define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) #define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) #define fdb_ignore_flow_level_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) - -#define ESW_OFFLOADS_NUM_GROUPS 4 +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) /* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated @@ -36,6 +39,7 @@ static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, 1 * 1024 * 1024, 64 * 1024, 128 }; +#define ESW_FT_TBL_SZ (64 * 1024) struct mlx5_esw_chains_priv { struct rhashtable chains_ht; @@ -44,6 +48,7 @@ struct mlx5_esw_chains_priv { struct mutex lock; struct mlx5_flow_table *tc_end_fdb; + struct mapping_ctx *chains_mapping; int fdb_left[ARRAY_SIZE(ESW_POOLS)]; }; @@ -54,9 +59,12 @@ struct fdb_chain { u32 chain; int ref; + int id; struct mlx5_eswitch *esw; struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; }; struct fdb_prio_key { @@ -99,6 +107,12 @@ bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; } +bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) +{ + return mlx5_esw_chains_prios_supported(esw) && + fdb_ignore_flow_level_supported(esw); +} + u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) { if (!mlx5_esw_chains_prios_supported(esw)) @@ -198,7 +212,9 @@ mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); + sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? + mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) : + mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); if (!sz) return ERR_PTR(-ENOSPC); ft_attr.max_fte = sz; @@ -234,7 +250,7 @@ mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, } ft_attr.autogroup.num_reserved_entries = 2; - ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS; + ft_attr.autogroup.max_num_groups = esw->params.large_group_num; fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(fdb)) { esw_warn(esw->dev, @@ -255,6 +271,83 @@ mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, mlx5_destroy_flow_table(fdb); } +static int +create_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)]; + struct mlx5_eswitch *esw = fdb_chain->esw; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) || + !mlx5_esw_chains_prios_supported(esw)) + return 0; + + err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mapping_add(esw_chains_mapping(esw), + &fdb_chain->chain, &index); + mapping_remove(esw_chains_mapping(esw), + MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + fdb_chain->id = index; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); + MLX5_SET(set_action_in, modact, data, fdb_chain->id); + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + fdb_chain->miss_modify_hdr = mod_hdr; + + fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); + if (IS_ERR(fdb_chain->restore_rule)) { + err = PTR_ERR(fdb_chain->restore_rule); + goto err_rule; + } + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); +err_mod_hdr: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + return err; +} + +static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + if (!fdb_chain->miss_modify_hdr) + return; + + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); +} + static struct fdb_chain * mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) { @@ -269,6 +362,10 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) fdb_chain->chain = chain; INIT_LIST_HEAD(&fdb_chain->prios_list); + err = create_fdb_chain_restore(fdb_chain); + if (err) + goto err_restore; + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); if (err) @@ -277,6 +374,8 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) return fdb_chain; err_insert: + destroy_fdb_chain_restore(fdb_chain); +err_restore: kvfree(fdb_chain); return ERR_PTR(err); } @@ -288,6 +387,8 @@ mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); + + destroy_fdb_chain_restore(fdb_chain); kvfree(fdb_chain); } @@ -310,10 +411,11 @@ mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) } static struct mlx5_flow_handle * -mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, +mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, + struct mlx5_flow_table *fdb, struct mlx5_flow_table *next_fdb) { - static const struct mlx5_flow_spec spec = {}; + struct mlx5_eswitch *esw = fdb_chain->esw; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act act = {}; @@ -322,7 +424,13 @@ mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = next_fdb; - return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1); + if (next_fdb == tc_end_fdb(esw) && + mlx5_esw_chains_prios_supported(esw)) { + act.modify_hdr = fdb_chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1); } static int @@ -345,7 +453,8 @@ mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, list_for_each_entry_continue_reverse(pos, &fdb_chain->prios_list, list) { - miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb, + miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, + pos->fdb, next_fdb); if (IS_ERR(miss_rules[n])) { err = PTR_ERR(miss_rules[n]); @@ -459,7 +568,7 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, } /* Add miss rule to next_fdb */ - miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb); + miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); if (IS_ERR(miss_rule)) { err = PTR_ERR(miss_rule); goto err_miss_rule; @@ -618,12 +727,44 @@ mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) return tc_end_fdb(esw); } +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) +{ + u32 chain, prio, level; + int err; + + if (!fdb_ignore_flow_level_supported(esw)) { + err = -EOPNOTSUPP; + + esw_warn(esw->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_esw_chains_get_chain_range(esw), + prio = mlx5_esw_chains_get_prio_range(esw); + level = mlx5_esw_chains_get_level_range(esw); + + return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft) +{ + mlx5_esw_chains_destroy_fdb_table(esw, ft); +} + static int mlx5_esw_chains_init(struct mlx5_eswitch *esw) { struct mlx5_esw_chains_priv *chains_priv; struct mlx5_core_dev *dev = esw->dev; u32 max_flow_counter, fdb_max; + struct mapping_ctx *mapping; int err; chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); @@ -637,7 +778,7 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw_debug(dev, "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max); + max_flow_counter, esw->params.large_group_num, fdb_max); mlx5_esw_chains_init_sz_pool(esw); @@ -645,6 +786,16 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; } else { esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", @@ -660,10 +811,20 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) if (err) goto init_prios_ht_err; + mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), + true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto mapping_err; + } + esw_chains_mapping(esw) = mapping; + mutex_init(&esw_chains_lock(esw)); return 0; +mapping_err: + rhashtable_destroy(&esw_prios_ht(esw)); init_prios_ht_err: rhashtable_destroy(&esw_chains_ht(esw)); init_chains_ht_err: @@ -675,6 +836,7 @@ static void mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) { mutex_destroy(&esw_chains_lock(esw)); + mapping_destroy(esw_chains_mapping(esw)); rhashtable_destroy(&esw_prios_ht(esw)); rhashtable_destroy(&esw_chains_ht(esw)); @@ -704,12 +866,9 @@ mlx5_esw_chains_open(struct mlx5_eswitch *esw) /* Open level 1 for split rules now if prios isn't supported */ if (!mlx5_esw_chains_prios_supported(esw)) { - ft = mlx5_esw_chains_get_table(esw, 0, 1, 1); - - if (IS_ERR(ft)) { - err = PTR_ERR(ft); + err = mlx5_esw_vport_tbl_get(esw); + if (err) goto level_1_err; - } } return 0; @@ -725,7 +884,7 @@ static void mlx5_esw_chains_close(struct mlx5_eswitch *esw) { if (!mlx5_esw_chains_prios_supported(esw)) - mlx5_esw_chains_put_table(esw, 0, 1, 1); + mlx5_esw_vport_tbl_put(esw); mlx5_esw_chains_put_table(esw, 0, 1, 0); mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); } @@ -756,3 +915,30 @@ mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) mlx5_esw_chains_close(esw); mlx5_esw_chains_cleanup(esw); } + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping) +{ + return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping); +} + +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping) +{ + return mapping_remove(esw_chains_mapping(esw), chain_mapping); +} + +int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, + u32 *chain) +{ + int err; + + err = mapping_find(esw_chains_mapping(esw), tag, chain); + if (err) { + esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); + return -ENOENT; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h index 2e13097fe348..f8c4239846ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h @@ -4,8 +4,12 @@ #ifndef __ML5_ESW_CHAINS_H__ #define __ML5_ESW_CHAINS_H__ +#include "eswitch.h" + bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); +bool +mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw); u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); u32 @@ -23,8 +27,23 @@ mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, struct mlx5_flow_table * mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw); +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft); + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping); +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, + u32 chain_mapping); + int mlx5_esw_chains_create(struct mlx5_eswitch *esw); void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); -#endif /* __ML5_ESW_CHAINS_H__ */ +int +mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index e49acd0c5da5..7f618a443bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -39,6 +39,7 @@ #include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" +#include "devlink.h" #include "ecpf.h" enum { @@ -1333,7 +1334,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - memset(spec, 0, sizeof(*spec)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach drop flow counter */ @@ -1345,7 +1345,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, dest_num++; } vport->ingress.legacy.drop_rule = - mlx5_add_flow_rules(vport->ingress.acl, spec, + mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, dst, dest_num); if (IS_ERR(vport->ingress.legacy.drop_rule)) { err = PTR_ERR(vport->ingress.legacy.drop_rule); @@ -1408,7 +1408,6 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; int dest_num = 0; int err = 0; @@ -1437,11 +1436,6 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (err) return err; - /* Drop others rule (star rule) */ - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - goto out; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ @@ -1453,7 +1447,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, dest_num++; } vport->egress.legacy.drop_rule = - mlx5_add_flow_rules(vport->egress.acl, spec, + mlx5_add_flow_rules(vport->egress.acl, NULL, &flow_act, dst, dest_num); if (IS_ERR(vport->egress.legacy.drop_rule)) { err = PTR_ERR(vport->egress.legacy.drop_rule); @@ -1462,8 +1456,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, vport->vport, err); vport->egress.legacy.drop_rule = NULL; } -out: - kvfree(spec); + return err; } @@ -1669,34 +1662,6 @@ static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) ((u8 *)node_guid)[0] = mac[5]; } -static void esw_apply_vport_conf(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - u16 vport_num = vport->vport; - int flags; - - if (mlx5_esw_is_manager_vport(esw, vport_num)) - return; - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, 1, - vport->info.link_state); - - /* Host PF has its own mac/guid. */ - if (vport_num) { - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, - vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, - vport->info.node_guid); - } - - flags = (vport->info.vlan || vport->info.qos) ? - SET_VLAN_STRIP | SET_VLAN_INSERT : 0; - modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, - flags); -} - static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1706,8 +1671,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (mlx5_esw_is_manager_vport(esw, vport->vport)) return 0; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(vport->ingress.legacy.drop_counter)) { esw_warn(esw->dev, @@ -1721,8 +1685,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (ret) goto ingress_err; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(vport->egress.legacy.drop_counter)) { esw_warn(esw->dev, @@ -1783,29 +1746,75 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, esw_vport_destroy_offloads_acl_tables(esw, vport); } -static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - enum mlx5_eswitch_vport_event enabled_events) +static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + int flags; + int err; + + err = esw_vport_setup_acl(esw, vport); + if (err) + return err; + + /* Attach vport to the eswitch rate limiter */ + esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share); + + if (mlx5_esw_is_manager_vport(esw, vport_num)) + return 0; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + vport->info.link_state); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); + + return 0; +} + +/* Don't cleanup vport->info, it's needed to restore vport configuration */ +static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u16 vport_num = vport->vport; + + if (!mlx5_esw_is_manager_vport(esw, vport_num)) + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + + esw_vport_disable_qos(esw, vport); + esw_vport_cleanup_acl(esw, vport); +} + +static int esw_enable_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; int ret; + vport = mlx5_eswitch_get_vport(esw, vport_num); + mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Restore old vport configuration */ - esw_apply_vport_conf(esw, vport); - - ret = esw_vport_setup_acl(esw, vport); + ret = esw_vport_setup(esw, vport); if (ret) goto done; - /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport, vport->info.max_rate, - vport->qos.bw_share)) - esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); - /* Sync with current vport context */ vport->enabled_events = enabled_events; vport->enabled = true; @@ -1826,10 +1835,11 @@ done: return ret; } -static void esw_disable_vport(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_disable_vport(struct mlx5_eswitch *esw, u16 vport_num) { - u16 vport_num = vport->vport; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); mutex_lock(&esw->state_lock); if (!vport->enabled) @@ -1847,16 +1857,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - esw_vport_disable_qos(esw, vport); - - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && - esw->mode == MLX5_ESWITCH_LEGACY) - mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, 1, - MLX5_VPORT_ADMIN_STATE_DOWN); - - esw_vport_cleanup_acl(esw, vport); + esw_vport_cleanup(esw, vport); esw->enabled_vports--; done: @@ -1944,6 +1945,59 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + + err = esw_enable_vport(esw, vport_num, enabled_events); + if (err) + return err; + + err = esw_offloads_load_rep(esw, vport_num); + if (err) + goto err_rep; + + return err; + +err_rep: + esw_disable_vport(esw, vport_num); + return err; +} + +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + esw_offloads_unload_rep(esw, vport_num); + esw_disable_vport(esw, vport_num); +} + +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) +{ + int i; + + mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs) + mlx5_eswitch_unload_vport(esw, i); +} + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + int i; + + mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) { + err = mlx5_eswitch_load_vport(esw, i, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_vf_vports(esw, i - 1); + return err; +} + /* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs * whichever are present on the eswitch. */ @@ -1951,46 +2005,33 @@ int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events) { - struct mlx5_vport *vport; - int num_vfs; int ret; - int i; /* Enable PF vport */ - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - ret = esw_enable_vport(esw, vport, enabled_events); + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - ret = esw_enable_vport(esw, vport, enabled_events); + ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { - ret = esw_enable_vport(esw, vport, enabled_events); - if (ret) - goto vf_err; - } + ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs, + enabled_events); + if (ret) + goto vf_err; return 0; vf_err: - num_vfs = i - 1; - mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, num_vfs) - esw_disable_vport(esw, vport); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_disable_vport(esw, vport); - } + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); ecpf_err: - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_disable_vport(esw, vport); + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1999,19 +2040,81 @@ ecpf_err: */ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { - struct mlx5_vport *vport; - int i; + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - mlx5_esw_for_all_vports_reverse(esw, i, vport) - esw_disable_vport(esw, vport); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + + mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); } -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) { + struct devlink *devlink = priv_to_devlink(esw->dev); + union devlink_param_value val; int err; - if (!ESW_ALLOWED(esw) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + err = devlink_param_driverinit_value_get(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + &val); + if (!err) { + esw->params.large_group_num = val.vu32; + } else { + esw_warn(esw->dev, + "Devlink can't get param fdb_large_groups, uses default (%d).\n", + ESW_OFFLOADS_DEFAULT_NUM_GROUPS); + esw->params.large_group_num = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + } +} + +static void +mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) +{ + const u32 *out; + + WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); + + if (num_vfs < 0) + return; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); +} + +/** + * mlx5_eswitch_enable_locked - Enable eswitch + * @esw: Pointer to eswitch + * @mode: Eswitch mode to enable + * @num_vfs: Enable eswitch for given number of VFs. This is optional. + * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS. + * Caller should pass num_vfs > 0 when enabling eswitch for + * vf vports. Caller should pass num_vfs = 0, when eswitch + * is enabled without sriov VFs or when caller + * is unaware of the sriov state of the host PF on ECPF based + * eswitch. Caller should pass < 0 when num_vfs should be + * completely ignored. This is typically the case when eswitch + * is enabled without sriov regardless of PF/ECPF system. + * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads + * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. + * It returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) +{ + int err; + + lockdep_assert_held(&esw->mode_lock); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } @@ -2022,6 +2125,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "engress ACL is not supported by FW\n"); + mlx5_eswitch_get_devlink_param(esw); + + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + esw_create_tsar(esw); esw->mode = mode; @@ -2058,11 +2165,34 @@ abort: return err; } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch swich for given number of VFs. + * Caller must pass num_vfs > 0 when enabling eswitch for + * vf vports. + * mlx5_eswitch_enable() returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +{ + int ret; + + if (!ESW_ALLOWED(esw)) + return 0; + + mutex_lock(&esw->mode_lock); + ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); + mutex_unlock(&esw->mode_lock); + return ret; +} + +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) { int old_mode; - if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) + lockdep_assert_held_write(&esw->mode_lock); + + if (esw->mode == MLX5_ESWITCH_NONE) return; esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -2091,6 +2221,16 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); } +void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +{ + if (!ESW_ALLOWED(esw)) + return; + + mutex_lock(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw, clear_vf); + mutex_unlock(&esw->mode_lock); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -2142,6 +2282,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr.hlist); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); + mutex_init(&esw->mode_lock); mlx5_esw_for_all_vports(esw, i, vport) { vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); @@ -2176,6 +2317,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->mode_lock); + mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); @@ -2410,12 +2553,11 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, } /* Star rule to forward all traffic to uplink vport */ - memset(spec, 0, sizeof(*spec)); memset(&dest, 0, sizeof(dest)); dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = MLX5_VPORT_UPLINK; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -2600,9 +2742,13 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, u64 bytes = 0; int err = 0; - if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) + if (esw->mode != MLX5_ESWITCH_LEGACY) return 0; + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + if (vport->egress.legacy.drop_counter) mlx5_fc_query(dev, vport->egress.legacy.drop_counter, &stats->rx_dropped, &bytes); @@ -2613,20 +2759,22 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - return 0; + goto unlock; err = mlx5_query_vport_down_stats(dev, vport->vport, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) - return err; + goto unlock; if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) stats->rx_dropped += rx_discard_vport_down; if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) stats->tx_dropped += tx_discard_vport_down; - return 0; +unlock: + mutex_unlock(&esw->state_lock); + return err; } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -2742,22 +2890,4 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); } -void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) -{ - const u32 *out; - - WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); - - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { - esw->esw_funcs.num_vfs = num_vfs; - return; - } - - out = mlx5_esw_query_functions(esw->dev); - if (IS_ERR(out)) - return; - esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, - host_params_context.host_num_of_vfs); - kvfree(out); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4472710ccc9c..39f42f985fbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -42,6 +42,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" +#include "en/tc_ct.h" #define FDB_TC_MAX_CHAIN 3 #define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) @@ -55,6 +56,8 @@ #ifdef CONFIG_MLX5_ESWITCH +#define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15 + #define MLX5_MAX_UC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) @@ -183,12 +186,22 @@ struct mlx5_eswitch_fdb { int vlan_push_pop_refcount; struct mlx5_esw_chains_priv *esw_chains_priv; + struct { + DECLARE_HASHTABLE(table, 8); + /* Protects vports.table */ + struct mutex lock; + } vports; + } offloads; }; u32 flags; }; struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_modify_hdr *restore_copy_hdr_id; + struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; @@ -224,6 +237,7 @@ struct mlx5_esw_functions { enum { MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), + MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1), }; struct mlx5_eswitch { @@ -244,6 +258,11 @@ struct mlx5_eswitch { */ struct mutex state_lock; + /* Protects eswitch mode change that occurs via one or more + * user commands, i.e. sriov state change, devlink commands. + */ + struct mutex mode_lock; + struct { bool enabled; u32 root_tsar_id; @@ -255,6 +274,9 @@ struct mlx5_eswitch { u16 manager_vport; u16 first_host_vport; struct mlx5_esw_functions esw_funcs; + struct { + u32 large_group_num; + } params; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -279,7 +301,11 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); + +#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); @@ -315,6 +341,7 @@ struct mlx5_termtbl_handle; bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec); @@ -375,6 +402,8 @@ enum { enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ESW_ATTR_FLAG_HAIRPIN = BIT(3), }; struct mlx5_esw_flow_attr { @@ -405,6 +434,9 @@ struct mlx5_esw_flow_attr { u16 prio; u32 dest_chain; u32 flags; + struct mlx5_flow_table *fdb; + struct mlx5_flow_table *dest_ft; + struct mlx5_ct_attr ct_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; }; @@ -414,7 +446,6 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode); int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); @@ -433,6 +464,11 @@ int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u16 vlan_id, u32 flow_action); +static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) +{ + return esw->qos.enabled; +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { @@ -608,7 +644,6 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); int @@ -623,11 +658,30 @@ void esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw); +void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw); + +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw); + +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } @@ -636,8 +690,11 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} - +static inline struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1a57b2bd74b8..f171eb2234b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,7 +37,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -50,6 +50,181 @@ #define MLX5_ESW_MISS_FLOWS (2) #define UPLINK_REP_INDEX 0 +/* Per vport tables */ + +#define MLX5_ESW_VPORT_TABLE_SIZE 128 + +/* This struct is used as a key to the hash table and we need it to be packed + * so hash result is consistent + */ +struct mlx5_vport_key { + u32 chain; + u16 prio; + u16 vport; + u16 vhca_id; +} __packed; + +struct mlx5_vport_table { + struct hlist_node hlist; + struct mlx5_flow_table *fdb; + u32 num_rules; + struct mlx5_vport_key key; +}; + +#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4 + +static struct mlx5_flow_table * +esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb; + + ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS; + ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE; + ft_attr.prio = FDB_PER_VPORT; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", + PTR_ERR(fdb)); + } + + return fdb; +} + +static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, + struct mlx5_vport_key *key) +{ + key->vport = attr->in_rep->vport; + key->chain = attr->chain; + key->prio = attr->prio; + key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + return jhash(key, sizeof(*key), 0); +} + +/* caller must hold vports.lock */ +static struct mlx5_vport_table * +esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) +{ + struct mlx5_vport_table *e; + + hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) + if (!memcmp(&e->key, skey, sizeof(*skey))) + return e; + + return NULL; +} + +static void +esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_vport_table *e; + struct mlx5_vport_key key; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + hkey = flow_attr_to_vport_key(esw, attr, &key); + e = esw_vport_tbl_lookup(esw, &key, hkey); + if (!e || --e->num_rules) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_table(e->fdb); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); +} + +static struct mlx5_flow_table * +esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + struct mlx5_vport_table *e; + struct mlx5_vport_key skey; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + hkey = flow_attr_to_vport_key(esw, attr, &skey); + e = esw_vport_tbl_lookup(esw, &skey, hkey); + if (e) { + e->num_rules++; + goto out; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + fdb = ERR_PTR(-ENOMEM); + goto err_alloc; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + fdb = ERR_PTR(-ENOENT); + goto err_ns; + } + + fdb = esw_vport_tbl_create(esw, ns); + if (IS_ERR(fdb)) + goto err_ns; + + e->fdb = fdb; + e->num_rules = 1; + e->key = skey; + hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return e->fdb; + +err_ns: + kfree(e); +err_alloc: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return fdb; +} + +int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_flow_attr attr = {}; + struct mlx5_eswitch_rep rep = {}; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + int i; + + attr.prio = 1; + attr.in_rep = &rep; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.in_rep->vport = vport->vport; + fdb = esw_vport_tbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + +out: + mlx5_esw_vport_tbl_put(esw); + return PTR_ERR(fdb); +} + +void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_flow_attr attr = {}; + struct mlx5_eswitch_rep rep = {}; + struct mlx5_vport *vport; + int i; + + attr.prio = 1; + attr.in_rep = &rep; + mlx5_esw_for_all_vports(esw, i, vport) { + attr.in_rep->vport = vport->vport; + esw_vport_tbl_put(esw, &attr); + } +} + +/* End: Per vport tables */ + static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { @@ -85,7 +260,8 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, attr->in_rep->vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); @@ -124,6 +300,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; + bool hairpin = false; int j, i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) @@ -148,7 +325,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { struct mlx5_flow_table *ft; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + if (attr->dest_ft) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + i++; + } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw); @@ -191,8 +373,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; if (attr->inner_match_level != MLX5_MATCH_NONE) @@ -201,27 +381,45 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; - fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, - !!split); + if (split) { + fdb = esw_vport_tbl_get(esw, attr); + } else { + if (attr->chain || attr->prio) + fdb = mlx5_esw_chains_get_table(esw, attr->chain, + attr->prio, 0); + else + fdb = attr->fdb; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) + mlx5_eswitch_set_rule_source_port(esw, spec, attr); + } if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; } - if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) { rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); - else + hairpin = true; + } else { rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + } if (IS_ERR(rule)) goto err_add_rule; else atomic64_inc(&esw->offloads.num_flows); + if (hairpin) + attr->flags |= MLX5_ESW_ATTR_FLAG_HAIRPIN; + return rule; err_add_rule: - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split); + if (split) + esw_vport_tbl_put(esw, attr); + else if (attr->chain || attr->prio) + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_esw_get: if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); @@ -247,7 +445,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, goto err_get_fast; } - fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1); + fwd_fdb = esw_vport_tbl_get(esw, attr); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@ -285,7 +483,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, return rule; add_err: - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); + esw_vport_tbl_put(esw, attr); err_get_fwd: mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_get_fast: @@ -303,20 +501,25 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - /* unref the term table */ - for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (attr->dests[i].termtbl) - mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + if (attr->flags & MLX5_ESW_ATTR_FLAG_HAIRPIN) { + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } } atomic64_dec(&esw->offloads.num_flows); if (fwd_rule) { - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); + esw_vport_tbl_put(esw, attr); mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); } else { - mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, - !!split); + if (split) + esw_vport_tbl_put(esw, attr); + else if (attr->chain || attr->prio) + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, + 0); if (attr->dest_chain) mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); } @@ -578,14 +781,21 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_1; +} + static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; + u8 curr, wanted; int err; - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + if (!mlx5_eswitch_reg_c1_loopback_supported(esw) && + !mlx5_eswitch_vport_match_metadata_enabled(esw)) return 0; err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false, @@ -593,22 +803,33 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) if (err) return err; - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); + curr = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + wanted = MLX5_FDB_TO_VPORT_REG_C_0; + if (mlx5_eswitch_reg_c1_loopback_supported(esw)) + wanted |= MLX5_FDB_TO_VPORT_REG_C_1; if (enable) - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + curr |= wanted; else - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + curr &= ~wanted; MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + esw_vport_context.fdb_to_vport_reg_c_id, curr); MLX5_SET(modify_esw_vport_context_in, in, field_select.fdb_to_vport_reg_c_id, 1); - return mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, - in, sizeof(in)); + err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, in, + sizeof(in)); + if (!err) { + if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1)) + esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + else + esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + } + + return err; } static void peer_miss_rules_setup(struct mlx5_eswitch *esw, @@ -621,7 +842,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -836,6 +1058,59 @@ out: return err; } +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return ERR_PTR(-EOPNOTSUPP); + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw) +{ + return ESW_CHAIN_TAG_METADATA_MASK; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -851,8 +1126,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -1057,6 +1333,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) } ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; + ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1134,7 +1411,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -1160,6 +1438,148 @@ out: return flow_rule; } + +static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode) +{ + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + struct mlx5_core_dev *dev = esw->dev; + int vport; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == MLX5_ESWITCH_NONE) + return -EOPNOTSUPP; + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } + +query_vports: + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { + mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); + if (prev_mlx5_mode != mlx5_mode) + return -EINVAL; + prev_mlx5_mode = mlx5_mode; + } + +out: + *mode = mlx5_mode; + return 0; +} + +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return; + + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + u8 modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_modify_hdr *mod_hdr; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, modact, src_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(copy_action_in, modact, dst_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_KERNEL, 1, + modact); + if (IS_ERR(mod_hdr)) { + esw_warn(dev, "Failed to create restore mod header, err: %d\n", + err); + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + esw->offloads.restore_copy_hdr_id = mod_hdr; + + kvfree(flow_group_in); + + return 0; + +err_mod_hdr: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; +} + static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1172,13 +1592,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, false); - mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + esw->dev->priv.sriov.num_vfs); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -1233,187 +1654,66 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); -} - -static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; int i; - mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs) __esw_offloads_unload_rep(esw, rep, rep_type); -} - -static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) -{ - u8 rep_type = NUM_REP_TYPES; - - while (rep_type-- > 0) - __unload_reps_vf_vport(esw, nvports, rep_type); -} - -static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); - - /* Special vports must be the last to unload. */ - __unload_reps_special_vport(esw, rep_type); -} - -static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw) -{ - u8 rep_type = NUM_REP_TYPES; - - while (rep_type-- > 0) - __unload_reps_all_vport(esw, rep_type); -} - -static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, u8 rep_type) -{ - int err = 0; - - if (atomic_cmpxchg(&rep->rep_data[rep_type].state, - REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); - if (err) - atomic_set(&rep->rep_data[rep_type].state, - REP_REGISTERED); - } - - return err; -} - -static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - int err; - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - return err; - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_pf; - } if (mlx5_ecpf_vport_exists(esw->dev)) { rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_ecpf; + __esw_offloads_unload_rep(esw, rep, rep_type); } - return 0; - -err_ecpf: if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); __esw_offloads_unload_rep(esw, rep, rep_type); } -err_pf: rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); - return err; } -static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; - int err, i; - - mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { - err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto err_vf; - } - - return 0; - -err_vf: - __unload_reps_vf_vport(esw, --i, rep_type); - return err; -} - -static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ + int rep_type; int err; - /* Special vports must be loaded first, uplink rep creates mdev resource. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; - err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); - if (err) - goto err_vfs; + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + if (err) + goto err_reps; + } return 0; -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) -{ - u8 rep_type = 0; - int err; - - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_reps; - } - - return err; - err_reps: - while (rep_type-- > 0) - __unload_reps_vf_vport(esw, nvports, rep_type); + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + for (--rep_type; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); return err; } -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw) +void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u8 rep_type = 0; - int err; - - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, rep_type); - if (err) - goto err_reps; - } + struct mlx5_eswitch_rep *rep; + int rep_type; - return err; + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; -err_reps: - while (rep_type-- > 0) - __unload_reps_all_vport(esw, rep_type); - return err; + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -1601,14 +1901,21 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; - static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_act flow_act = {}; int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - MLX5_SET(set_action_in, action, data, - mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); vport->ingress.offloads.modify_metadata = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, @@ -1625,7 +1932,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; vport->ingress.offloads.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, - &spec, &flow_act, NULL, 0); + NULL, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); esw_warn(esw->dev, @@ -1837,6 +2144,18 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } +static bool +esw_check_vport_match_metadata_mandatory(const struct mlx5_eswitch *esw) +{ + return mlx5_core_mp_enabled(esw->dev); +} + +static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw) +{ + return esw_check_vport_match_metadata_mandatory(esw) && + esw_check_vport_match_metadata_supported(esw); +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -1875,7 +2194,7 @@ static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) struct mlx5_vport *vport; int err; - if (esw_check_vport_match_metadata_supported(esw)) + if (esw_use_vport_metadata(esw)) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); @@ -1911,27 +2230,34 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) if (err) return err; - err = esw_create_offloads_fdb_tables(esw, total_vports); + err = esw_create_offloads_table(esw, total_vports); if (err) - goto create_fdb_err; + goto create_offloads_err; - err = esw_create_offloads_table(esw, total_vports); + err = esw_create_restore_table(esw); if (err) - goto create_ft_err; + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; err = esw_create_vport_rx_group(esw, total_vports); if (err) goto create_fg_err; + mutex_init(&esw->fdb_table.offloads.vports.lock); + hash_init(esw->fdb_table.offloads.vports.table); + return 0; create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: esw_destroy_offloads_fdb_tables(esw); - create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); return err; @@ -1939,9 +2265,11 @@ create_fdb_err: static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { + mutex_destroy(&esw->fdb_table.offloads.vports.lock); esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); } @@ -1961,11 +2289,12 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) /* Number of VFs can only change from "0 to x" or "x to 0". */ if (esw->esw_funcs.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); } else { int err; - err = esw_offloads_load_vf_reps(esw, new_num_vfs); + err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, + MLX5_VPORT_UC_ADDR_CHANGE); if (err) return; } @@ -2023,40 +2352,43 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); - err = esw_offloads_steering_init(esw); - if (err) - goto err_steering_init; err = esw_set_passing_vport_metadata(esw, true); if (err) goto err_vport_metadata; + err = esw_offloads_steering_init(esw); + if (err) + goto err_steering_init; + /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; - err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + /* Uplink vport rep must load first. */ + err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); if (err) - goto err_vports; + goto err_uplink; - err = esw_offloads_load_all_reps(esw); + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); if (err) - goto err_reps; + goto err_vports; esw_offloads_devcom_init(esw); - mutex_init(&esw->offloads.termtbl_mutex); return 0; -err_reps: - mlx5_eswitch_disable_pf_vf_vports(esw); err_vports: + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); +err_uplink: esw_set_passing_vport_metadata(esw, false); -err_vport_metadata: - esw_offloads_steering_cleanup(esw); err_steering_init: + esw_offloads_steering_cleanup(esw); +err_vport_metadata: mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); return err; } @@ -2065,11 +2397,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -2082,11 +2416,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw); mlx5_eswitch_disable_pf_vf_vports(esw); + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } @@ -2166,60 +2501,82 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static int mlx5_devlink_eswitch_check(struct devlink *devlink) +static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev = devlink_priv(devlink); - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return -EOPNOTSUPP; if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE && - !mlx5_core_is_ecpf_esw_manager(dev)) - return -EOPNOTSUPP; - return 0; } +static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw) +{ + /* devlink commands in NONE eswitch mode are currently supported only + * on ECPF. + */ + return (esw->mode == MLX5_ESWITCH_NONE && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; u16 cur_mlx5_mode, mlx5_mode = 0; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; - cur_mlx5_mode = dev->priv.eswitch->mode; - if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + + cur_mlx5_mode = esw->mode; + if (cur_mlx5_mode == mlx5_mode) - return 0; + goto unlock; if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) - return esw_offloads_start(dev->priv.eswitch, extack); + err = esw_offloads_start(esw, extack); else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) - return esw_offloads_stop(dev->priv.eswitch, extack); + err = esw_offloads_stop(esw, extack); else - return -EINVAL; + err = -EINVAL; + +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; - return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); + if (err) + goto unlock; + + err = esw_mode_to_devlink(esw->mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, @@ -2230,18 +2587,24 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, int err, vport, num_vport; u8 mlx5_mode; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto out; + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) - return 0; + goto out; /* fall through */ case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: break; } @@ -2249,7 +2612,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } err = esw_inline_mode_from_devlink(mode, &mlx5_mode); @@ -2266,6 +2630,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, } esw->offloads.inline_mode = mlx5_mode; + mutex_unlock(&esw->mode_lock); return 0; revert_inline_mode: @@ -2275,6 +2640,7 @@ revert_inline_mode: vport, esw->offloads.inline_mode); out: + mutex_unlock(&esw->mode_lock); return err; } @@ -2284,48 +2650,19 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); -} - -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) -{ - u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; - struct mlx5_core_dev *dev = esw->dev; - int vport; - - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == MLX5_ESWITCH_NONE) - return -EOPNOTSUPP; - - switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { - case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - mlx5_mode = MLX5_INLINE_MODE_NONE; - goto out; - case MLX5_CAP_INLINE_MODE_L2: - mlx5_mode = MLX5_INLINE_MODE_L2; - goto out; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - goto query_vports; - } - -query_vports: - mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); - mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { - mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); - if (prev_mlx5_mode != mlx5_mode) - return -EINVAL; - prev_mlx5_mode = mlx5_mode; - } + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; -out: - *mode = mlx5_mode; - return 0; + err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, @@ -2336,30 +2673,40 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) - return -EOPNOTSUPP; + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) { + err = -EOPNOTSUPP; + goto unlock; + } - if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) - return -EOPNOTSUPP; + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) { + err = -EOPNOTSUPP; + goto unlock; + } if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; - return 0; + goto unlock; } if (esw->offloads.encap == encap) - return 0; + goto unlock; if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto unlock; } esw_destroy_offloads_fdb_tables(esw); @@ -2375,6 +2722,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } +unlock: + mutex_unlock(&esw->mode_lock); return err; } @@ -2385,14 +2734,36 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - err = mlx5_devlink_eswitch_check(devlink); + err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + *encap = esw->offloads.encap; +unlock: + mutex_unlock(&esw->mode_lock); return 0; } +static bool +mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) +{ + /* Currently, only ECPF based device has representor for host PF. */ + if (vport_num == MLX5_VPORT_PF && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + if (vport_num == MLX5_VPORT_ECPF && + !mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) @@ -2403,8 +2774,10 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_data = &rep->rep_data[rep_type]; - atomic_set(&rep_data->state, REP_REGISTERED); + if (likely(mlx5_eswitch_vport_has_rep(esw, i))) { + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); + } } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2464,15 +2837,53 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) vport_num <= esw->dev->priv.sriov.max_vfs; } +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED); +} +EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled); + bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) { return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); } EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { - return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; + u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0); + u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); + u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + u32 val; + + /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ + WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); + + /* Trim vhca_id to ESW_VHCA_ID_BITS */ + vhca_id &= vhca_id_mask; + + /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they + * don't overlap with VF numbers, and themselves, after trimming. + */ + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) == + (MLX5_VPORT_ECPF & vport_num_mask)); + + /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't + * overlap with pf and ecpf. + */ + if (vport_num != MLX5_VPORT_UPLINK && + vport_num != MLX5_VPORT_ECPF) + WARN_ON_ONCE(vport_num >= vport_num_mask - 1); + + /* We can now trim vport_num to ESW_VPORT_BITS */ + vport_num &= vport_num_mask; + + val = (vhca_id << ESW_VPORT_BITS) | vport_num; + return val << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index dc08ed9339ab..17a0d2bc102b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -3,6 +3,7 @@ #include <linux/mlx5/fs.h> #include "eswitch.h" +#include "fs_core.h" struct mlx5_termtbl_handle { struct hlist_node termtbl_hlist; @@ -28,6 +29,10 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); + if (dest->vport.pkt_reformat) + hash = jhash(dest->vport.pkt_reformat, + sizeof(*dest->vport.pkt_reformat), + hash); return hash; } @@ -37,11 +42,19 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, struct mlx5_flow_act *flow_act2, struct mlx5_flow_destination *dest2) { - return flow_act1->action != flow_act2->action || - dest1->vport.num != dest2->vport.num || - dest1->vport.vhca_id != dest2->vport.vhca_id || - memcmp(&flow_act1->vlan, &flow_act2->vlan, - sizeof(flow_act1->vlan)); + int ret; + + ret = flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); + if (ret) + return ret; + + return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? + memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, + sizeof(*dest1->vport.pkt_reformat)) : 0; } static int @@ -49,7 +62,6 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, struct mlx5_termtbl_handle *tt, struct mlx5_flow_act *flow_act) { - static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; int err; @@ -63,7 +75,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION; + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft_attr.prio = FDB_SLOW_PATH; ft_attr.max_fte = 1; ft_attr.autogroup.max_num_groups = 1; @@ -73,9 +86,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, return -EOPNOTSUPP; } - tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); - if (IS_ERR(tt->rule)) { esw_warn(dev, "Failed to create termination table rule\n"); goto add_flow_err; @@ -93,7 +105,8 @@ add_flow_err: static struct mlx5_termtbl_handle * mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest) + struct mlx5_flow_destination *dest, + struct mlx5_esw_flow_attr *attr) { struct mlx5_termtbl_handle *tt; bool found = false; @@ -101,7 +114,6 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, int err; mutex_lock(&esw->offloads.termtbl_mutex); - hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); hash_for_each_possible(esw->offloads.termtbl_tbl, tt, termtbl_hlist, hash_key) { @@ -123,6 +135,7 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; tt->dest.vport.num = dest->vport.num; tt->dest.vport.vhca_id = dest->vport.vhca_id; + tt->dest.vport.flags = dest->vport.flags; memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); @@ -157,31 +170,50 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, } } +static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt) +{ + switch (rt->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + return true; + default: + return false; + } +} + static void mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, struct mlx5_flow_act *dst) { - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) - return; - - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); - memset(&src->vlan[0], 0, sizeof(src->vlan[0])); - - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) - return; + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + } - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); - memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->pkt_reformat = src->pkt_reformat; + src->pkt_reformat = NULL; + } } static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, const struct mlx5_flow_spec *spec) { - u32 port_mask, port_value; + u16 port_mask, port_value; if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return spec->flow_context.flow_source == @@ -191,20 +223,32 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, misc_parameters.source_port); port_value = MLX5_GET(fte_match_param, spec->match_value, misc_parameters.source_port); - return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK; + return (port_mask & port_value) == MLX5_VPORT_UPLINK; } bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + int i; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || + !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; /* push vlan on RX */ - return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && - mlx5_eswitch_offload_is_uplink_port(esw, spec); + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + return true; + + /* hairpin */ + for (i = attr->split_count; i < attr->out_count; i++) + if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + return true; + + return false; } struct mlx5_flow_handle * @@ -234,7 +278,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, /* get the terminating table for the action list */ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, - &dest[i]); + &dest[i], attr); if (IS_ERR(tt)) { esw_warn(esw->dev, "Failed to create termination table\n"); goto revert_changes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 4c61d25d2e88..b794888fa3ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -57,7 +57,7 @@ struct mlx5_fpga_ipsec_cmd_context { struct completion complete; struct mlx5_fpga_device *dev; struct list_head list; /* Item in pending_cmds */ - u8 command[0]; + u8 command[]; }; struct mlx5_fpga_esp_xfrm; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9dc24241dc91..62ce2b9417ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -110,9 +110,9 @@ #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) -#define OFFLOADS_MAX_FT 1 -#define OFFLOADS_NUM_PRIOS 1 -#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define OFFLOADS_MAX_FT 2 +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) #define LAG_PRIO_NUM_LEVELS 1 #define LAG_NUM_PRIOS 1 @@ -145,7 +145,7 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, LAG_PRIO_NUM_LEVELS))), - ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), @@ -1322,7 +1322,7 @@ add_rule_fte(struct fs_fte *fte, fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; - atomic_inc(&fte->node.version); + atomic_inc(&fg->node.version); out: return handle; @@ -1577,28 +1577,19 @@ struct match_list { struct mlx5_flow_group *g; }; -struct match_list_head { - struct list_head list; - struct match_list first; -}; - -static void free_match_list(struct match_list_head *head, bool ft_locked) +static void free_match_list(struct match_list *head, bool ft_locked) { - if (!list_empty(&head->list)) { - struct match_list *iter, *match_tmp; + struct match_list *iter, *match_tmp; - list_del(&head->first.list); - tree_put_node(&head->first.g->node, ft_locked); - list_for_each_entry_safe(iter, match_tmp, &head->list, - list) { - tree_put_node(&iter->g->node, ft_locked); - list_del(&iter->list); - kfree(iter); - } + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node, ft_locked); + list_del(&iter->list); + kfree(iter); } } -static int build_match_list(struct match_list_head *match_head, +static int build_match_list(struct match_list *match_head, struct mlx5_flow_table *ft, const struct mlx5_flow_spec *spec, bool ft_locked) @@ -1615,14 +1606,8 @@ static int build_match_list(struct match_list_head *match_head, rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; - if (likely(list_empty(&match_head->list))) { - if (!tree_get_node(&g->node)) - continue; - match_head->first.g = g; - list_add_tail(&match_head->first.list, - &match_head->list); + if (unlikely(!tree_get_node(&g->node))) continue; - } curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { @@ -1630,10 +1615,6 @@ static int build_match_list(struct match_list_head *match_head, err = -ENOMEM; goto out; } - if (!tree_get_node(&g->node)) { - kfree(curr_match); - continue; - } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } @@ -1699,7 +1680,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct match_list *iter; bool take_write = false; struct fs_fte *fte; - u64 version; + u64 version = 0; int err; fte = alloc_fte(ft, spec, flow_act); @@ -1707,10 +1688,12 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, return ERR_PTR(-ENOMEM); search_again_locked: - version = matched_fgs_get_version(match_head); if (flow_act->flags & FLOW_ACT_NO_APPEND) goto skip_search; - /* Try to find a fg that already contains a matching fte */ + version = matched_fgs_get_version(match_head); + /* Try to find an fte with identical match value and attempt update its + * action. + */ list_for_each_entry(iter, match_head, list) { struct fs_fte *fte_tmp; @@ -1738,10 +1721,12 @@ skip_search: goto out; } - /* Check the fgs version, for case the new FTE with the - * same values was added while the fgs weren't locked + /* Check the fgs version. If version have changed it could be that an + * FTE with the same match value was added while the fgs weren't + * locked. */ - if (version != matched_fgs_get_version(match_head)) { + if (!(flow_act->flags & FLOW_ACT_NO_APPEND) && + version != matched_fgs_get_version(match_head)) { take_write = true; goto search_again_locked; } @@ -1785,9 +1770,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_steering *steering = get_steering(&ft->node); - struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; - struct match_list_head match_head; + struct match_list match_head; + struct mlx5_flow_group *g; bool take_write = false; struct fs_fte *fte; int version; @@ -1892,12 +1877,16 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, int num_dest) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); + static const struct mlx5_flow_spec zero_spec = {}; struct mlx5_flow_destination gen_dest = {}; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; u32 sw_action = flow_act->action; struct fs_prio *prio; + if (!spec) + spec = &zero_spec; + fs_get_obj(prio, ft->node.parent); if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) @@ -2700,6 +2689,17 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } + /* We put this priority last, knowing that nothing will get here + * unless explicitly forwarded to. This is possible because the + * slow path tables have catch all rules and nothing gets passed + * those tables. + */ + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + set_prio_attrs(steering->fdb_root_ns); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index ab69effb056d..f43caefd07a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -470,7 +470,7 @@ struct mlx5_fc_bulk { u32 base_id; int bulk_len; unsigned long *bitmask; - struct mlx5_fc fcs[0]; + struct mlx5_fc fcs[]; }; static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 909a7f284614..90e3d0233101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -613,6 +613,44 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) fwhandle, 0); } +#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */ +static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT); + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u32 out[MLX5_ST_SZ_DW(mirc_reg)]; + u32 in[MLX5_ST_SZ_DW(mirc_reg)]; + int err; + + if (!MLX5_CAP_MCAM_REG2(dev, mirc)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 1); + if (err) + return err; + + do { + memset(out, 0, sizeof(out)); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 0); + if (err) + return err; + + *status = MLX5_GET(mirc_reg, out, status_code); + if (*status != MLXFW_FSM_REACTIVATE_STATUS_BUSY) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + return 0; +} + static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { .component_query = mlx5_component_query, .fsm_lock = mlx5_fsm_lock, @@ -620,6 +658,7 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { .fsm_block_download = mlx5_fsm_block_download, .fsm_component_verify = mlx5_fsm_component_verify, .fsm_activate = mlx5_fsm_activate, + .fsm_reactivate = mlx5_fsm_reactivate, .fsm_query_state = mlx5_fsm_query_state, .fsm_cancel = mlx5_fsm_cancel, .fsm_release = mlx5_fsm_release @@ -634,6 +673,7 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, .ops = &mlx5_mlxfw_dev_ops, .psid = dev->board_id, .psid_size = strlen(dev->board_id), + .devlink = priv_to_devlink(dev), }, .mlx5_core_dev = dev }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d9f4e8c59c1f..fa1665caac46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -627,7 +627,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) health->fw_reporter = devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, - 0, false, dev); + 0, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); @@ -636,7 +636,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) devlink_health_reporter_create(devlink, &mlx5_fw_fatal_reporter_ops, MLX5_REPORTER_FW_GRACEFUL_PERIOD, - true, dev); + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 90cb50fe17fd..1eef66ee849e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -235,6 +235,9 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, } const struct ethtool_ops mlx5i_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5i_get_drvinfo, .get_strings = mlx5i_get_strings, .get_sset_count = mlx5i_get_sset_count, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 56078b23f1a0..673aaa815f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -483,7 +483,7 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index c87962cab921..de7e01a027bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -56,7 +56,7 @@ struct mlx5i_priv { u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; - char *mlx5e_priv[0]; + char *mlx5e_priv[]; }; int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); @@ -107,7 +107,7 @@ struct mlx5i_tx_wqe { struct mlx5_wqe_datagram_seg datagram; struct mlx5_wqe_eth_pad pad; struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 416676c35b1f..e9089a793632 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -93,9 +93,8 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, static void mlx5_lag_fib_event_flush(struct notifier_block *nb) { struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); - struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); - flush_workqueue(ldev->wq); + flush_workqueue(mp->wq); } struct mlx5_fib_event_work { @@ -293,7 +292,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } - queue_work(ldev->wq, &fib_work->work); + queue_work(mp->wq, &fib_work->work); return NOTIFY_DONE; } @@ -306,11 +305,17 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) if (mp->fib_nb.notifier_call) return 0; + mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); + if (!mp->wq) + return -ENOMEM; + mp->fib_nb.notifier_call = mlx5_lag_fib_event; err = register_fib_notifier(&init_net, &mp->fib_nb, mlx5_lag_fib_event_flush, NULL); - if (err) + if (err) { + destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; + } return err; } @@ -323,5 +328,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) return; unregister_fib_notifier(&init_net, &mp->fib_nb); + destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h index 79be89e9c7a4..258ac7b2964e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -16,6 +16,7 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; struct fib_info *mfi; /* used in tracking fib events */ + struct workqueue_struct *wq; }; #ifdef CONFIG_MLX5_ESWITCH diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index 3fc575d1c3ec..dcea87ec5977 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -42,7 +42,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); MLX5_SET(encryption_key_obj, obj, key_type, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS); MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index e065c2f68f5a..6cbccba56f70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -21,7 +21,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) struct mlx5_dm *dm; if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) - return 0; + return NULL; dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f554cfddcf4e..7af4210c1b96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -70,6 +70,7 @@ #include "diag/fw_tracer.h" #include "ecpf.h" #include "lib/hv_vhca.h" +#include "diag/rsc_dump.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -880,6 +881,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); + dev->rsc_dump = mlx5_rsc_dump_create(dev); return 0; @@ -909,6 +911,7 @@ err_devcom: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_rsc_dump_destroy(dev); mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); @@ -1079,6 +1082,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_hv_vhca_init(dev->hv_vhca); + err = mlx5_rsc_dump_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init Resource dump\n"); + goto err_rsc_dump; + } + err = mlx5_fpga_device_start(dev); if (err) { mlx5_core_err(dev, "fpga device start failed %d\n", err); @@ -1134,6 +1143,8 @@ err_tls_start: err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: + mlx5_rsc_dump_cleanup(dev); +err_rsc_dump: mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: @@ -1155,6 +1166,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); + mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); @@ -1199,15 +1211,10 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) goto err_devlink_reg; } - if (mlx5_device_registered(dev)) { + if (mlx5_device_registered(dev)) mlx5_attach_device(dev); - } else { - err = mlx5_register_device(dev); - if (err) { - mlx5_core_err(dev, "register device failed %d\n", err); - goto err_reg_dev; - } - } + else + mlx5_register_device(dev); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1215,9 +1222,6 @@ out: return err; -err_reg_dev: - if (boot) - mlx5_devlink_unregister(priv_to_devlink(dev)); err_devlink_reg: mlx5_unload(dev); err_load: @@ -1231,7 +1235,7 @@ function_teardown: return err; } -int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) +void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { if (cleanup) { mlx5_unregister_device(dev); @@ -1260,7 +1264,6 @@ int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) mlx5_function_teardown(dev, cleanup); out: mutex_unlock(&dev->intf_state_mutex); - return 0; } static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) @@ -1282,7 +1285,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->alloc_mutex); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); @@ -1381,12 +1383,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); - if (mlx5_unload_one(dev, true)) { - mlx5_core_err(dev, "mlx5_unload_one failed\n"); - mlx5_health_flush(dev); - return; - } - + mlx5_unload_one(dev, true); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); mlx5_devlink_free(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index da67b28d6e23..a8fb43a85d1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -182,7 +182,7 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev); bool mlx5_device_registered(struct mlx5_core_dev *dev); -int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); @@ -244,6 +244,6 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); -int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 42cc3c7ac5b6..366f2cbfc6db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,54 +36,31 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; void *mkc; int err; - u8 key; - - spin_lock_irq(&dev->priv.mkey_lock); - key = dev->priv.mkey_key++; - spin_unlock_irq(&dev->priv.mkey_lock); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - MLX5_SET(mkc, mkc, mkey_7_0, key); - - if (callback) - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) return err; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); - mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - mkey_index, key, mkey->key); + mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); return 0; } -EXPORT_SYMBOL(mlx5_core_create_mkey_cb); - -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) -{ - return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, - NULL, 0, NULL, NULL); -} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 01c380425f9d..f3b29d9ade1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -101,22 +101,39 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in, + u16 uid) +{ + return (!memcmp(entry->rl_raw, rl_in, sizeof(entry->rl_raw)) && + entry->uid == uid); +} + /* Finds an entry where we can register the given rate * If the rate already exists, return the entry where it is registered, * otherwise return the first available entry. * If the table is full, return NULL */ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, - struct mlx5_rate_limit *rl) + void *rl_in, u16 uid, bool dedicated) { struct mlx5_rl_entry *ret_entry = NULL; bool empty_found = false; int i; for (i = 0; i < table->max_size; i++) { - if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl)) - return &table->rl_entry[i]; - if (!empty_found && !table->rl_entry[i].rl.rate) { + if (dedicated) { + if (!table->rl_entry[i].refcount) + return &table->rl_entry[i]; + continue; + } + + if (table->rl_entry[i].refcount) { + if (table->rl_entry[i].dedicated) + continue; + if (mlx5_rl_are_equal_raw(&table->rl_entry[i], rl_in, + uid)) + return &table->rl_entry[i]; + } else if (!empty_found) { empty_found = true; ret_entry = &table->rl_entry[i]; } @@ -126,18 +143,19 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, } static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, - u16 index, - struct mlx5_rate_limit *rl) + struct mlx5_rl_entry *entry, bool set) { - u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {}; + void *pp_context; + pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx); MLX5_SET(set_pp_rate_limit_in, in, opcode, MLX5_CMD_OP_SET_PP_RATE_LIMIT); - MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate); - MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz); - MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz); + MLX5_SET(set_pp_rate_limit_in, in, uid, entry->uid); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index); + if (set) + memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw)); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -158,23 +176,25 @@ bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, } EXPORT_SYMBOL(mlx5_rl_are_equal); -int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, - struct mlx5_rate_limit *rl) +int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, + bool dedicated_entry, u16 *index) { struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry; int err = 0; + u32 rate; + rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit); mutex_lock(&table->rl_lock); - if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) { + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", - rl->rate, table->min_rate, table->max_rate); + rate, table->min_rate, table->max_rate); err = -EINVAL; goto out; } - entry = find_rl_entry(table, rl); + entry = find_rl_entry(table, rl_in, uid, dedicated_entry); if (!entry) { mlx5_core_err(dev, "Max number of %u rates reached\n", table->max_size); @@ -185,16 +205,24 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, /* rate already configured */ entry->refcount++; } else { + memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw)); + entry->uid = uid; /* new rate limit */ - err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl); + err = mlx5_set_pp_rate_limit_cmd(dev, entry, true); if (err) { - mlx5_core_err(dev, "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", - err, rl->rate, rl->max_burst_sz, - rl->typical_pkt_sz); + mlx5_core_err( + dev, + "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, rate, + MLX5_GET(set_pp_rate_limit_context, rl_in, + burst_upper_bound), + MLX5_GET(set_pp_rate_limit_context, rl_in, + typical_packet_size)); goto out; } - entry->rl = *rl; + entry->refcount = 1; + entry->dedicated = dedicated_entry; } *index = entry->index; @@ -202,20 +230,61 @@ out: mutex_unlock(&table->rl_lock); return err; } +EXPORT_SYMBOL(mlx5_rl_add_rate_raw); + +void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + + mutex_lock(&table->rl_lock); + entry = &table->rl_entry[index - 1]; + entry->refcount--; + if (!entry->refcount) + /* need to remove rate */ + mlx5_set_pp_rate_limit_cmd(dev, entry, false); + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate_raw); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + + return mlx5_rl_add_rate_raw(dev, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, + false, index); +} EXPORT_SYMBOL(mlx5_rl_add_rate); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) { + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry = NULL; - struct mlx5_rate_limit reset_rl = {0}; /* 0 is a reserved value for unlimited rate */ if (rl->rate == 0) return; + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + mutex_lock(&table->rl_lock); - entry = find_rl_entry(table, rl); + entry = find_rl_entry(table, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, false); if (!entry || !entry->refcount) { mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n", rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); @@ -223,11 +292,9 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) } entry->refcount--; - if (!entry->refcount) { + if (!entry->refcount) /* need to remove rate */ - mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl); - entry->rl = reset_rl; - } + mlx5_set_pp_rate_limit_cmd(dev, entry, false); out: mutex_unlock(&table->rl_lock); @@ -273,14 +340,13 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev) void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) { struct mlx5_rl_table *table = &dev->priv.rl_table; - struct mlx5_rate_limit rl = {0}; int i; /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) - if (table->rl_entry[i].rl.rate) - mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index, - &rl); + if (table->rl_entry[i].refcount) + mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], + false); kfree(dev->priv.rl_table.rl_entry); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 03f037811f1d..3094d20297a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -77,8 +77,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs); - err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY); + err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 2d93228ff633..554811de4c9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -672,7 +672,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, dest_action = action; if (!action->dest_tbl.is_fw_tbl) { if (action->dest_tbl.tbl->dmn != dmn) { - mlx5dr_dbg(dmn, + mlx5dr_err(dmn, "Destination table belongs to a different domain\n"); goto out_invalid_arg; } @@ -703,7 +703,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action->dest_tbl.fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0; } else { - mlx5dr_dbg(dmn, + mlx5dr_err(dmn, "Failed mlx5_cmd_query_flow_table ret: %d\n", ret); return ret; @@ -772,7 +772,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, /* Check action duplication */ if (++action_type_set[action_type] > max_actions_type) { - mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n", + mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n", action_type, max_actions_type); goto out_invalid_arg; } @@ -781,7 +781,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, if (dr_action_validate_and_get_next_state(action_domain, action_type, &state)) { - mlx5dr_dbg(dmn, "Invalid action sequence provided\n"); + mlx5dr_err(dmn, "Invalid action sequence provided\n"); return -EOPNOTSUPP; } } @@ -797,7 +797,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, rx_rule && recalc_cs_required && dest_action) { ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); if (ret) { - mlx5dr_dbg(dmn, + mlx5dr_err(dmn, "Failed to handle checksum recalculation err %d\n", ret); return ret; @@ -964,6 +964,24 @@ struct mlx5dr_action *mlx5dr_action_create_drop(void) } struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl.is_fw_tbl = true; + action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl.fw_tbl.id = table_num; + action->dest_tbl.fw_tbl.type = FS_FT_FDB; + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) { struct mlx5dr_action *action; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index a9da961d4d2f..48b6358b6845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -59,7 +59,7 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); if (ret) { - mlx5dr_dbg(dmn, "Couldn't allocate PD\n"); + mlx5dr_err(dmn, "Couldn't allocate PD, ret: %d", ret); return ret; } @@ -192,7 +192,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, ret = dr_domain_query_vports(dmn); if (ret) { - mlx5dr_dbg(dmn, "Failed to query vports caps\n"); + mlx5dr_err(dmn, "Failed to query vports caps (err: %d)", ret); goto free_vports_caps; } @@ -213,7 +213,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, int ret; if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { - mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n"); + mlx5dr_err(dmn, "Failed to allocate domain, bad link type\n"); return -EOPNOTSUPP; } @@ -257,7 +257,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0); if (!vport_cap) { - mlx5dr_dbg(dmn, "Failed to get esw manager vport\n"); + mlx5dr_err(dmn, "Failed to get esw manager vport\n"); return -ENOENT; } @@ -268,7 +268,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; break; default: - mlx5dr_dbg(dmn, "Invalid domain\n"); + mlx5dr_err(dmn, "Invalid domain\n"); ret = -EINVAL; break; } @@ -300,7 +300,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) mutex_init(&dmn->mutex); if (dr_domain_caps_init(mdev, dmn)) { - mlx5dr_dbg(dmn, "Failed init domain, no caps\n"); + mlx5dr_err(dmn, "Failed init domain, no caps\n"); goto free_domain; } @@ -348,8 +348,11 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) mutex_lock(&dmn->mutex); ret = mlx5dr_send_ring_force_drain(dmn); mutex_unlock(&dmn->mutex); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", + flags, ret); return ret; + } } if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index d7c7467e2d53..30d2d7376f56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -468,7 +468,7 @@ mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); if (err) { dr_icm_chill_buckets_abort(pool, bucket, buckets); - mlx5dr_dbg(pool->dmn, "Sync_steering failed\n"); + mlx5dr_err(pool->dmn, "Sync_steering failed\n"); chunk = NULL; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index c6dbd856df94..a95938874798 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -388,14 +388,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); if (idx == 0) { - mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n"); + mlx5dr_err(dmn, "Cannot generate any valid rules from mask\n"); return -EINVAL; } /* Check that all mask fields were consumed */ for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { if (((u8 *)&mask)[i] != 0) { - mlx5dr_info(dmn, "Mask contains unsupported parameters\n"); + mlx5dr_err(dmn, "Mask contains unsupported parameters\n"); return -EOPNOTSUPP; } } @@ -563,7 +563,7 @@ static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher, dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6); if (!nic_matcher->ste_builder) { - mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + mlx5dr_err(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); return -EINVAL; } @@ -634,13 +634,13 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, int ret; if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { - mlx5dr_info(dmn, "Invalid match criteria attribute\n"); + mlx5dr_err(dmn, "Invalid match criteria attribute\n"); return -EINVAL; } if (mask) { if (mask->match_sz > sizeof(struct mlx5dr_match_param)) { - mlx5dr_info(dmn, "Invalid match size attribute\n"); + mlx5dr_err(dmn, "Invalid match size attribute\n"); return -EINVAL; } mlx5dr_ste_copy_param(matcher->match_criteria, @@ -671,7 +671,7 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher * mlx5dr_matcher_create(struct mlx5dr_table *tbl, - u16 priority, + u32 priority, u8 match_criteria_enable, struct mlx5dr_match_parameters *mask) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index e4cff7abb348..cce3ee7a6614 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -826,8 +826,8 @@ again: ste_location, send_ste_list); if (!new_htbl) { mlx5dr_htbl_put(cur_htbl); - mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n", - cur_htbl->chunk_size); + mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk_size); } else { cur_htbl = new_htbl; } @@ -877,7 +877,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, if (!value_size || (value_size > sizeof(struct mlx5dr_match_param) || (value_size % sizeof(u32)))) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); return false; } @@ -888,7 +888,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->outer), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); return false; } } @@ -898,7 +898,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->misc), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); return false; } } @@ -908,7 +908,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->inner), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); return false; } } @@ -918,7 +918,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->misc2), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); return false; } } @@ -928,7 +928,7 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, e_idx = min(s_idx + sizeof(param->misc3), value_size); if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { - mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + mlx5dr_err(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); return false; } } @@ -1221,7 +1221,7 @@ remove_action_members: dr_rule_remove_action_members(rule); free_rule: kfree(rule); - mlx5dr_info(dmn, "Failed creating rule\n"); + mlx5dr_err(dmn, "Failed creating rule\n"); return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 095ec7b1399d..c0ab9cf74929 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -136,7 +136,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, &dr_qp->wq_ctrl); if (err) { - mlx5_core_info(mdev, "Can't create QP WQ\n"); + mlx5_core_warn(mdev, "Can't create QP WQ\n"); goto err_wq; } @@ -652,8 +652,10 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) /* Init */ ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rst2init\n"); return ret; + } /* RTR */ ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); @@ -668,8 +670,10 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); return ret; + } /* RTS */ rts_attr.timeout = 14; @@ -677,8 +681,10 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) rts_attr.rnr_retry = 7; ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); return ret; + } return 0; } @@ -862,6 +868,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) cq_size = QUEUE_SIZE + 1; dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); if (!dmn->send_ring->cq) { + mlx5dr_err(dmn, "Failed creating CQ\n"); ret = -ENOMEM; goto free_send_ring; } @@ -873,6 +880,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); if (!dmn->send_ring->qp) { + mlx5dr_err(dmn, "Failed creating QP\n"); ret = -ENOMEM; goto clean_cq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index aade62a9ee5c..c0e3a1e7389d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -728,7 +728,7 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, { if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { if (mask->misc.source_port && mask->misc.source_port != 0xffff) { - mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n"); + mlx5dr_err(dmn, "Partial mask source_port is not supported\n"); return -EINVAL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 14ce2d7dbb66..c2fe48d7b75a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -128,16 +128,20 @@ static int dr_table_init_nic(struct mlx5dr_domain *dmn, DR_CHUNK_SIZE_1, MLX5DR_STE_LU_TYPE_DONT_CARE, 0); - if (!nic_tbl->s_anchor) + if (!nic_tbl->s_anchor) { + mlx5dr_err(dmn, "Failed allocating htbl\n"); return -ENOMEM; + } info.type = CONNECT_MISS; info.miss_icm_addr = nic_dmn->default_icm_addr; ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, nic_tbl->s_anchor, &info, true); - if (ret) + if (ret) { + mlx5dr_err(dmn, "Failed int and send htbl\n"); goto free_s_anchor; + } mlx5dr_htbl_get(nic_tbl->s_anchor); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index dffe35145d19..3fa739951b34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -705,7 +705,7 @@ struct mlx5dr_matcher { struct mlx5dr_matcher_rx_tx rx; struct mlx5dr_matcher_rx_tx tx; struct list_head matcher_list; - u16 prio; + u32 prio; struct mlx5dr_match_param mask; u8 match_criteria; refcount_t refcount; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index c2027192e21e..3b3f5b9d4f95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -140,7 +140,7 @@ static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_group *fg) { struct mlx5dr_matcher *matcher; - u16 priority = MLX5_GET(create_flow_group_in, in, + u32 priority = MLX5_GET(create_flow_group_in, in, start_flow_index); u8 match_criteria_enable = MLX5_GET(create_flow_group_in, in, @@ -384,6 +384,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { list_for_each_entry(dst, &fte->node.children, node.list) { enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 ft_id; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { @@ -420,6 +421,17 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, num_term_actions++; break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + ft_id = dst->dest_attr.ft_num; + tmp_action = mlx5dr_action_create_dest_table_num(domain, + ft_id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; default: err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index e1edc9c247b7..7deaca9ade3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -38,8 +38,6 @@ struct mlx5dr_action_dest { struct mlx5dr_action *reformat; }; -#ifdef CONFIG_MLX5_SW_STEERING - struct mlx5dr_domain * mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); @@ -59,7 +57,7 @@ u32 mlx5dr_table_get_id(struct mlx5dr_table *table); struct mlx5dr_matcher * mlx5dr_matcher_create(struct mlx5dr_table *table, - u16 priority, + u32 priority, u8 match_criteria_enable, struct mlx5dr_match_parameters *mask); @@ -77,6 +75,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action); struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num); + +struct mlx5dr_action * mlx5dr_action_create_dest_table(struct mlx5dr_table *table); struct mlx5dr_action * @@ -125,103 +126,4 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); } -#else /* CONFIG_MLX5_SW_STEERING */ - -static inline struct mlx5dr_domain * -mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; } - -static inline int -mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; } - -static inline int -mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; } - -static inline void -mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn) { } - -static inline struct mlx5dr_table * -mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; } - -static inline int -mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } - -static inline u32 -mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; } - -static inline struct mlx5dr_matcher * -mlx5dr_matcher_create(struct mlx5dr_table *table, - u16 priority, - u8 match_criteria_enable, - struct mlx5dr_match_parameters *mask) { return NULL; } - -static inline int -mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; } - -static inline struct mlx5dr_rule * -mlx5dr_rule_create(struct mlx5dr_matcher *matcher, - struct mlx5dr_match_parameters *value, - size_t num_actions, - struct mlx5dr_action *actions[]) { return NULL; } - -static inline int -mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; } - -static inline int -mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, - struct mlx5dr_action *action) { return 0; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, - struct mlx5_flow_table *ft) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, - u32 vport, u8 vhca_id_valid, - u16 vhca_id) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, - struct mlx5dr_action_dest *dests, - u32 num_of_dests) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_drop(void) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_tag(u32 tag_value) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, - enum mlx5dr_action_reformat_type reformat_type, - size_t data_sz, - void *data) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, - u32 flags, - size_t actions_sz, - __be64 actions[]) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_pop_vlan(void) { return NULL; } - -static inline struct mlx5dr_action * -mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, - __be32 vlan_hdr) { return NULL; } - -static inline int -mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; } - -static inline bool -mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; } - -#endif /* CONFIG_MLX5_SW_STEERING */ - #endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig index 0367f835a846..5b604501f33e 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig @@ -12,3 +12,4 @@ config MLXFW To compile this driver as a module, choose M here: the module will be called mlxfw. select XZ_DEC + select NET_DEVLINK diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index c50e74ab02c4..7654841a05c2 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -6,6 +6,30 @@ #include <linux/firmware.h> #include <linux/netlink.h> +#include <linux/device.h> +#include <net/devlink.h> + +struct mlxfw_dev { + const struct mlxfw_dev_ops *ops; + const char *psid; + u16 psid_size; + struct devlink *devlink; +}; + +static inline +struct device *mlxfw_dev_dev(struct mlxfw_dev *mlxfw_dev) +{ + return mlxfw_dev->devlink->dev; +} + +#define MLXFW_PRFX "mlxfw: " + +#define mlxfw_info(mlxfw_dev, fmt, ...) \ + dev_info(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) +#define mlxfw_err(mlxfw_dev, fmt, ...) \ + dev_err(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) +#define mlxfw_dbg(mlxfw_dev, fmt, ...) \ + dev_dbg(mlxfw_dev_dev(mlxfw_dev), MLXFW_PRFX fmt, ## __VA_ARGS__) enum mlxfw_fsm_state { MLXFW_FSM_STATE_IDLE, @@ -31,7 +55,19 @@ enum mlxfw_fsm_state_err { MLXFW_FSM_STATE_ERR_MAX, }; -struct mlxfw_dev; +enum mlxfw_fsm_reactivate_status { + MLXFW_FSM_REACTIVATE_STATUS_OK, + MLXFW_FSM_REACTIVATE_STATUS_BUSY, + MLXFW_FSM_REACTIVATE_STATUS_PROHIBITED_FW_VER_ERR, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_COPY_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_ERASE_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_RESTORE_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_CANDIDATE_FW_DEACTIVATION_FAILED, + MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED, + MLXFW_FSM_REACTIVATE_STATUS_ERR_DEVICE_RESET_REQUIRED, + MLXFW_FSM_REACTIVATE_STATUS_ERR_FW_PROGRAMMING_NEEDED, + MLXFW_FSM_REACTIVATE_STATUS_MAX, +}; struct mlxfw_dev_ops { int (*component_query)(struct mlxfw_dev *mlxfw_dev, u16 component_index, @@ -51,6 +87,8 @@ struct mlxfw_dev_ops { int (*fsm_activate)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + int (*fsm_reactivate)(struct mlxfw_dev *mlxfw_dev, u8 *status); + int (*fsm_query_state)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, enum mlxfw_fsm_state *fsm_state, enum mlxfw_fsm_state_err *fsm_state_err); @@ -58,16 +96,6 @@ struct mlxfw_dev_ops { void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); - - void (*status_notify)(struct mlxfw_dev *mlxfw_dev, - const char *msg, const char *comp_name, - u32 done_bytes, u32 total_bytes); -}; - -struct mlxfw_dev { - const struct mlxfw_dev_ops *ops; - const char *psid; - u16 psid_size; }; #if IS_REACHABLE(CONFIG_MLXFW) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 29e95d0a6ad1..046a0cb82ed8 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -16,38 +16,70 @@ (MLXFW_FSM_STATE_WAIT_TIMEOUT_MS / MLXFW_FSM_STATE_WAIT_CYCLE_MS) #define MLXFW_FSM_MAX_COMPONENT_SIZE (10 * (1 << 20)) -static const char * const mlxfw_fsm_state_err_str[] = { - [MLXFW_FSM_STATE_ERR_ERROR] = - "general error", - [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] = - "component hash mismatch", - [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] = - "component not applicable", - [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] = - "unknown key", - [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] = - "authentication failed", - [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] = - "component was not signed", - [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] = - "key not applicable", - [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] = - "bad format", - [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] = - "pending reset", - [MLXFW_FSM_STATE_ERR_MAX] = - "unknown error" +static const int mlxfw_fsm_state_errno[] = { + [MLXFW_FSM_STATE_ERR_ERROR] = -EIO, + [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] = -EBADMSG, + [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] = -ENOENT, + [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] = -ENOKEY, + [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] = -EACCES, + [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] = -EKEYREVOKED, + [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] = -EKEYREJECTED, + [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] = -ENOEXEC, + [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] = -EBUSY, + [MLXFW_FSM_STATE_ERR_MAX] = -EINVAL }; -static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, - const char *msg, const char *comp_name, - u32 done_bytes, u32 total_bytes) +#define MLXFW_ERR_PRFX "Firmware flash failed: " +#define MLXFW_ERR_MSG(fwdev, extack, msg, err) do { \ + mlxfw_err(fwdev, "%s, err (%d)\n", MLXFW_ERR_PRFX msg, err); \ + NL_SET_ERR_MSG_MOD(extack, MLXFW_ERR_PRFX msg); \ +} while (0) + +static int mlxfw_fsm_state_err(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, + enum mlxfw_fsm_state_err err) { - if (!mlxfw_dev->ops->status_notify) - return; - mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name, - done_bytes, total_bytes); -} + enum mlxfw_fsm_state_err fsm_state_err; + + fsm_state_err = min_t(enum mlxfw_fsm_state_err, err, + MLXFW_FSM_STATE_ERR_MAX); + + switch (fsm_state_err) { + case MLXFW_FSM_STATE_ERR_ERROR: + MLXFW_ERR_MSG(mlxfw_dev, extack, "general error", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component hash mismatch", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component not applicable", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY: + MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown key", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED: + MLXFW_ERR_MSG(mlxfw_dev, extack, "authentication failed", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED: + MLXFW_ERR_MSG(mlxfw_dev, extack, "component was not signed", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE: + MLXFW_ERR_MSG(mlxfw_dev, extack, "key not applicable", err); + break; + case MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT: + MLXFW_ERR_MSG(mlxfw_dev, extack, "bad format", err); + break; + case MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET: + MLXFW_ERR_MSG(mlxfw_dev, extack, "pending reset", err); + break; + case MLXFW_FSM_STATE_ERR_OK: /* fall through */ + case MLXFW_FSM_STATE_ERR_MAX: + MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown error", err); + break; + }; + + return mlxfw_fsm_state_errno[fsm_state_err]; +}; static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, enum mlxfw_fsm_state fsm_state, @@ -62,21 +94,18 @@ static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, retry: err = mlxfw_dev->ops->fsm_query_state(mlxfw_dev, fwhandle, &curr_fsm_state, &fsm_state_err); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, "FSM state query failed", err); return err; - - if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { - fsm_state_err = min_t(enum mlxfw_fsm_state_err, - fsm_state_err, MLXFW_FSM_STATE_ERR_MAX); - pr_err("Firmware flash failed: %s\n", - mlxfw_fsm_state_err_str[fsm_state_err]); - NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed"); - return -EINVAL; } + + if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) + return mlxfw_fsm_state_err(mlxfw_dev, extack, fsm_state_err); + if (curr_fsm_state != fsm_state) { if (--times == 0) { - pr_err("Timeout reached on FSM state change"); - NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Timeout reached on FSM state change", -ETIMEDOUT); return -ETIMEDOUT; } msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS); @@ -85,6 +114,92 @@ retry: return 0; } +static int +mlxfw_fsm_reactivate_err(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, u8 err) +{ + enum mlxfw_fsm_reactivate_status status; + +#define MXFW_REACT_PRFX "Reactivate FSM: " +#define MLXFW_REACT_ERR(msg, err) \ + MLXFW_ERR_MSG(mlxfw_dev, extack, MXFW_REACT_PRFX msg, err) + + status = min_t(enum mlxfw_fsm_reactivate_status, err, + MLXFW_FSM_REACTIVATE_STATUS_MAX); + + switch (status) { + case MLXFW_FSM_REACTIVATE_STATUS_BUSY: + MLXFW_REACT_ERR("busy", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_PROHIBITED_FW_VER_ERR: + MLXFW_REACT_ERR("prohibited fw ver", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_COPY_FAILED: + MLXFW_REACT_ERR("first page copy failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_ERASE_FAILED: + MLXFW_REACT_ERR("first page erase failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FIRST_PAGE_RESTORE_FAILED: + MLXFW_REACT_ERR("first page restore failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_CANDIDATE_FW_DEACTIVATION_FAILED: + MLXFW_REACT_ERR("candidate fw deactivation failed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_ERR_DEVICE_RESET_REQUIRED: + MLXFW_REACT_ERR("device reset required", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_ERR_FW_PROGRAMMING_NEEDED: + MLXFW_REACT_ERR("fw programming needed", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED: + MLXFW_REACT_ERR("fw already activated", err); + break; + case MLXFW_FSM_REACTIVATE_STATUS_OK: /* fall through */ + case MLXFW_FSM_REACTIVATE_STATUS_MAX: + MLXFW_REACT_ERR("unexpected error", err); + break; + }; + return -EREMOTEIO; +}; + +static int mlxfw_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, + struct netlink_ext_ack *extack, + bool *supported) +{ + u8 status; + int err; + + if (!mlxfw_dev->ops->fsm_reactivate) + return 0; + + err = mlxfw_dev->ops->fsm_reactivate(mlxfw_dev, &status); + if (err == -EOPNOTSUPP) { + *supported = false; + return 0; + } + + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not reactivate firmware flash", err); + return err; + } + + if (status == MLXFW_FSM_REACTIVATE_STATUS_OK || + status == MLXFW_FSM_REACTIVATE_STATUS_FW_ALREADY_ACTIVATED) + return 0; + + return mlxfw_fsm_reactivate_err(mlxfw_dev, extack, status); +} + +static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + devlink_flash_update_status_notify(mlxfw_dev->devlink, msg, comp_name, + done_bytes, total_bytes); +} + #define MLXFW_ALIGN_DOWN(x, align_bits) ((x) & ~((1 << (align_bits)) - 1)) #define MLXFW_ALIGN_UP(x, align_bits) \ MLXFW_ALIGN_DOWN((x) + ((1 << (align_bits)) - 1), (align_bits)) @@ -92,6 +207,7 @@ retry: static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, struct mlxfw_mfa2_component *comp, + bool reactivate_supp, struct netlink_ext_ack *extack) { u16 comp_max_write_size; @@ -108,34 +224,43 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index, &comp_max_size, &comp_align_bits, &comp_max_write_size); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, "FSM component query failed", err); return err; + } comp_max_size = min_t(u32, comp_max_size, MLXFW_FSM_MAX_COMPONENT_SIZE); if (comp->data_size > comp_max_size) { - pr_err("Component %d is of size %d which is bigger than limit %d\n", - comp->index, comp->data_size, comp_max_size); - NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Component size is bigger than limit", -EINVAL); return -EINVAL; } comp_max_write_size = MLXFW_ALIGN_DOWN(comp_max_write_size, comp_align_bits); - pr_debug("Component update\n"); + mlxfw_dbg(mlxfw_dev, "Component update\n"); mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle, comp->index, comp->data_size); - if (err) + if (err) { + if (!reactivate_supp) + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component update failed, FW reactivate is not supported", + err); + else + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component update failed", err); return err; + } err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_DOWNLOAD, extack); if (err) goto err_out; - pr_debug("Component download\n"); + mlxfw_dbg(mlxfw_dev, "Component download\n"); mlxfw_status_notify(mlxfw_dev, "Downloading component", comp_name, 0, comp->data_size); for (offset = 0; @@ -147,19 +272,25 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, err = mlxfw_dev->ops->fsm_block_download(mlxfw_dev, fwhandle, block_ptr, block_size, offset); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Component download failed", err); goto err_out; + } mlxfw_status_notify(mlxfw_dev, "Downloading component", comp_name, offset + block_size, comp->data_size); } - pr_debug("Component verify\n"); + mlxfw_dbg(mlxfw_dev, "Component verify\n"); mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle, comp->index); - if (err) + if (err) { + MLXFW_ERR_MSG(mlxfw_dev, extack, + "FSM component verify failed", err); goto err_out; + } err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED, extack); @@ -174,6 +305,7 @@ err_out: static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, struct mlxfw_mfa2_file *mfa2_file, + bool reactivate_supp, struct netlink_ext_ack *extack) { u32 component_count; @@ -184,8 +316,8 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, mlxfw_dev->psid_size, &component_count); if (err) { - pr_err("Could not find device PSID in MFA2 file\n"); - NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not find device PSID in MFA2 file", err); return err; } @@ -194,11 +326,17 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, comp = mlxfw_mfa2_file_component_get(mfa2_file, mlxfw_dev->psid, mlxfw_dev->psid_size, i); - if (IS_ERR(comp)) - return PTR_ERR(comp); + if (IS_ERR(comp)) { + err = PTR_ERR(comp); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Failed to get MFA2 component", err); + return err; + } - pr_info("Flashing component type %d\n", comp->index); - err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack); + mlxfw_info(mlxfw_dev, "Flashing component type %d\n", + comp->index); + err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, + reactivate_supp, extack); mlxfw_mfa2_file_component_put(comp); if (err) return err; @@ -211,26 +349,32 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, struct netlink_ext_ack *extack) { struct mlxfw_mfa2_file *mfa2_file; + bool reactivate_supp = true; u32 fwhandle; int err; if (!mlxfw_mfa2_check(firmware)) { - pr_err("Firmware file is not MFA2\n"); - NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Firmware file is not MFA2", -EINVAL); return -EINVAL; } mfa2_file = mlxfw_mfa2_file_init(firmware); - if (IS_ERR(mfa2_file)) - return PTR_ERR(mfa2_file); + if (IS_ERR(mfa2_file)) { + err = PTR_ERR(mfa2_file); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Failed to initialize MFA2 firmware file", err); + return err; + } - pr_info("Initialize firmware flash process\n"); + mlxfw_info(mlxfw_dev, "Initialize firmware flash process\n"); + devlink_flash_update_begin_notify(mlxfw_dev->devlink); mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process", NULL, 0, 0); err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle); if (err) { - pr_err("Could not lock the firmware FSM\n"); - NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not lock the firmware FSM", err); goto err_fsm_lock; } @@ -239,16 +383,26 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, if (err) goto err_state_wait_idle_to_locked; - err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack); + err = mlxfw_fsm_reactivate(mlxfw_dev, extack, &reactivate_supp); + if (err) + goto err_fsm_reactivate; + + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); + if (err) + goto err_state_wait_reactivate_to_locked; + + err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, + reactivate_supp, extack); if (err) goto err_flash_components; - pr_debug("Activate image\n"); + mlxfw_dbg(mlxfw_dev, "Activate image\n"); mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0); err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle); if (err) { - pr_err("Could not activate the downloaded image\n"); - NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image"); + MLXFW_ERR_MSG(mlxfw_dev, extack, + "Could not activate the downloaded image", err); goto err_fsm_activate; } @@ -257,21 +411,25 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, if (err) goto err_state_wait_activate_to_locked; - pr_debug("Handle release\n"); + mlxfw_dbg(mlxfw_dev, "Handle release\n"); mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); - pr_info("Firmware flash done.\n"); + mlxfw_info(mlxfw_dev, "Firmware flash done\n"); mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0); mlxfw_mfa2_file_fini(mfa2_file); + devlink_flash_update_end_notify(mlxfw_dev->devlink); return 0; err_state_wait_activate_to_locked: err_fsm_activate: err_flash_components: +err_state_wait_reactivate_to_locked: +err_fsm_reactivate: err_state_wait_idle_to_locked: mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); err_fsm_lock: mlxfw_mfa2_file_fini(mfa2_file); + devlink_flash_update_end_notify(mlxfw_dev->devlink); return err; } EXPORT_SYMBOL(mlxfw_firmware_flash); diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 79057af4fe99..5d9ddf36fb4e 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -496,7 +496,7 @@ mlxfw_mfa2_file_component_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, struct mlxfw_mfa2_comp_data { struct mlxfw_mfa2_component comp; - u8 buff[0]; + u8 buff[]; }; static const struct mlxfw_mfa2_tlv_component_descriptor * diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h index 33c971190bba..2014a5de5a01 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h @@ -11,7 +11,7 @@ struct mlxfw_mfa2_tlv { u8 version; u8 type; __be16 len; - u8 data[0]; + u8 data[]; } __packed; static inline const struct mlxfw_mfa2_tlv * diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e9f791c43f20..e9ccd333f61d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -82,7 +82,7 @@ struct mlxsw_core { struct mlxsw_core_port *ports; unsigned int max_ports; bool fw_flash_in_progress; - unsigned long driver_priv[0]; + unsigned long driver_priv[]; /* driver_priv has to be always the last item */ }; @@ -142,6 +142,7 @@ struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; void *priv; + bool enabled; }; struct mlxsw_event_listener_item { @@ -1197,6 +1198,72 @@ mlxsw_devlink_trap_group_init(struct devlink *devlink, return mlxsw_driver->trap_group_init(mlxsw_core, group); } +static int +mlxsw_devlink_trap_group_set(struct devlink *devlink, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_group_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_group_set(mlxsw_core, group, policer); +} + +static int +mlxsw_devlink_trap_policer_init(struct devlink *devlink, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_init(mlxsw_core, policer); +} + +static void +mlxsw_devlink_trap_policer_fini(struct devlink *devlink, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_fini) + return; + mlxsw_driver->trap_policer_fini(mlxsw_core, policer); +} + +static int +mlxsw_devlink_trap_policer_set(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_set(mlxsw_core, policer, rate, burst, + extack); +} + +static int +mlxsw_devlink_trap_policer_counter_get(struct devlink *devlink, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_policer_counter_get) + return -EOPNOTSUPP; + return mlxsw_driver->trap_policer_counter_get(mlxsw_core, policer, + p_drops); +} + static const struct devlink_ops mlxsw_devlink_ops = { .reload_down = mlxsw_devlink_core_bus_device_reload_down, .reload_up = mlxsw_devlink_core_bus_device_reload_up, @@ -1219,6 +1286,11 @@ static const struct devlink_ops mlxsw_devlink_ops = { .trap_fini = mlxsw_devlink_trap_fini, .trap_action_set = mlxsw_devlink_trap_action_set, .trap_group_init = mlxsw_devlink_trap_group_init, + .trap_group_set = mlxsw_devlink_trap_group_set, + .trap_policer_init = mlxsw_devlink_trap_policer_init, + .trap_policer_fini = mlxsw_devlink_trap_policer_fini, + .trap_policer_set = mlxsw_devlink_trap_policer_set, + .trap_policer_counter_get = mlxsw_devlink_trap_policer_counter_get, }; static int @@ -1457,14 +1529,12 @@ static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, static struct mlxsw_rx_listener_item * __find_rx_listener_item(struct mlxsw_core *mlxsw_core, - const struct mlxsw_rx_listener *rxl, - void *priv) + const struct mlxsw_rx_listener *rxl) { struct mlxsw_rx_listener_item *rxl_item; list_for_each_entry(rxl_item, &mlxsw_core->rx_listener_list, list) { - if (__is_rx_listener_equal(&rxl_item->rxl, rxl) && - rxl_item->priv == priv) + if (__is_rx_listener_equal(&rxl_item->rxl, rxl)) return rxl_item; } return NULL; @@ -1472,11 +1542,11 @@ __find_rx_listener_item(struct mlxsw_core *mlxsw_core, int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_rx_listener *rxl, - void *priv) + void *priv, bool enabled) { struct mlxsw_rx_listener_item *rxl_item; - rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv); + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); if (rxl_item) return -EEXIST; rxl_item = kmalloc(sizeof(*rxl_item), GFP_KERNEL); @@ -1484,6 +1554,7 @@ int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, return -ENOMEM; rxl_item->rxl = *rxl; rxl_item->priv = priv; + rxl_item->enabled = enabled; list_add_rcu(&rxl_item->list, &mlxsw_core->rx_listener_list); return 0; @@ -1491,12 +1562,11 @@ int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, EXPORT_SYMBOL(mlxsw_core_rx_listener_register); void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_rx_listener *rxl, - void *priv) + const struct mlxsw_rx_listener *rxl) { struct mlxsw_rx_listener_item *rxl_item; - rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv); + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); if (!rxl_item) return; list_del_rcu(&rxl_item->list); @@ -1505,6 +1575,19 @@ void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_rx_listener_unregister); +static void +mlxsw_core_rx_listener_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + bool enabled) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl); + if (WARN_ON(!rxl_item)) + return; + rxl_item->enabled = enabled; +} + static void mlxsw_core_event_listener_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -1534,14 +1617,12 @@ static bool __is_event_listener_equal(const struct mlxsw_event_listener *el_a, static struct mlxsw_event_listener_item * __find_event_listener_item(struct mlxsw_core *mlxsw_core, - const struct mlxsw_event_listener *el, - void *priv) + const struct mlxsw_event_listener *el) { struct mlxsw_event_listener_item *el_item; list_for_each_entry(el_item, &mlxsw_core->event_listener_list, list) { - if (__is_event_listener_equal(&el_item->el, el) && - el_item->priv == priv) + if (__is_event_listener_equal(&el_item->el, el)) return el_item; } return NULL; @@ -1559,7 +1640,7 @@ int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, .trap_id = el->trap_id, }; - el_item = __find_event_listener_item(mlxsw_core, el, priv); + el_item = __find_event_listener_item(mlxsw_core, el); if (el_item) return -EEXIST; el_item = kmalloc(sizeof(*el_item), GFP_KERNEL); @@ -1568,7 +1649,7 @@ int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, el_item->el = *el; el_item->priv = priv; - err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item); + err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item, true); if (err) goto err_rx_listener_register; @@ -1586,8 +1667,7 @@ err_rx_listener_register: EXPORT_SYMBOL(mlxsw_core_event_listener_register); void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_event_listener *el, - void *priv) + const struct mlxsw_event_listener *el) { struct mlxsw_event_listener_item *el_item; const struct mlxsw_rx_listener rxl = { @@ -1596,10 +1676,10 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, .trap_id = el->trap_id, }; - el_item = __find_event_listener_item(mlxsw_core, el, priv); + el_item = __find_event_listener_item(mlxsw_core, el); if (!el_item) return; - mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl, el_item); + mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl); list_del(&el_item->list); kfree(el_item); } @@ -1607,16 +1687,18 @@ EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); static int mlxsw_core_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, - void *priv) + void *priv, bool enabled) { - if (listener->is_event) + if (listener->is_event) { + WARN_ON(!enabled); return mlxsw_core_event_listener_register(mlxsw_core, - &listener->u.event_listener, + &listener->event_listener, priv); - else + } else { return mlxsw_core_rx_listener_register(mlxsw_core, - &listener->u.rx_listener, - priv); + &listener->rx_listener, + priv, enabled); + } } static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core, @@ -1625,26 +1707,31 @@ static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core, { if (listener->is_event) mlxsw_core_event_listener_unregister(mlxsw_core, - &listener->u.event_listener, - priv); + &listener->event_listener); else mlxsw_core_rx_listener_unregister(mlxsw_core, - &listener->u.rx_listener, - priv); + &listener->rx_listener); } int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv) { + enum mlxsw_reg_htgt_trap_group trap_group; + enum mlxsw_reg_hpkt_action action; char hpkt_pl[MLXSW_REG_HPKT_LEN]; int err; - err = mlxsw_core_listener_register(mlxsw_core, listener, priv); + err = mlxsw_core_listener_register(mlxsw_core, listener, priv, + listener->enabled_on_register); if (err) return err; - mlxsw_reg_hpkt_pack(hpkt_pl, listener->action, listener->trap_id, - listener->trap_group, listener->is_ctrl); + action = listener->enabled_on_register ? listener->en_action : + listener->dis_action; + trap_group = listener->enabled_on_register ? listener->en_trap_group : + listener->dis_trap_group; + mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, + trap_group, listener->is_ctrl); err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); if (err) goto err_trap_set; @@ -1664,8 +1751,8 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, char hpkt_pl[MLXSW_REG_HPKT_LEN]; if (!listener->is_event) { - mlxsw_reg_hpkt_pack(hpkt_pl, listener->unreg_action, - listener->trap_id, listener->trap_group, + mlxsw_reg_hpkt_pack(hpkt_pl, listener->dis_action, + listener->trap_id, listener->dis_trap_group, listener->is_ctrl); mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); } @@ -1674,17 +1761,33 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_trap_unregister); -int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, - const struct mlxsw_listener *listener, - enum mlxsw_reg_hpkt_action action) +int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + bool enabled) { + enum mlxsw_reg_htgt_trap_group trap_group; + enum mlxsw_reg_hpkt_action action; char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + /* Not supported for event listener */ + if (WARN_ON(listener->is_event)) + return -EINVAL; + action = enabled ? listener->en_action : listener->dis_action; + trap_group = enabled ? listener->en_trap_group : + listener->dis_trap_group; mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, - listener->trap_group, listener->is_ctrl); - return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + trap_group, listener->is_ctrl); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + return err; + + mlxsw_core_rx_listener_state_set(mlxsw_core, &listener->rx_listener, + enabled); + return 0; } -EXPORT_SYMBOL(mlxsw_core_trap_action_set); +EXPORT_SYMBOL(mlxsw_core_trap_state_set); static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) { @@ -1942,7 +2045,8 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, if ((rxl->local_port == MLXSW_PORT_DONT_CARE || rxl->local_port == local_port) && rxl->trap_id == rx_info->trap_id) { - found = true; + if (rxl_item->enabled) + found = true; break; } } @@ -2168,13 +2272,22 @@ int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module) /* Here we need to get the module width according to the module type. */ switch (module_type) { + case MLXSW_REG_PMTM_MODULE_TYPE_C2C8X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_OSFP: + return 8; + case MLXSW_REG_PMTM_MODULE_TYPE_C2C4X: /* fall through */ case MLXSW_REG_PMTM_MODULE_TYPE_BP_4X: /* fall through */ - case MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP: + case MLXSW_REG_PMTM_MODULE_TYPE_QSFP: return 4; - case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X: + case MLXSW_REG_PMTM_MODULE_TYPE_C2C2X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_DSFP: return 2; - case MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP: /* fall through */ - case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X: + case MLXSW_REG_PMTM_MODULE_TYPE_C2C1X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_SFP: return 1; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 543476a2e503..22b0dfa7cfae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -62,7 +62,6 @@ struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); u8 local_port; u16 trap_id; - enum mlxsw_reg_hpkt_action action; }; struct mlxsw_event_listener { @@ -76,58 +75,71 @@ struct mlxsw_listener { union { struct mlxsw_rx_listener rx_listener; struct mlxsw_event_listener event_listener; - } u; - enum mlxsw_reg_hpkt_action action; - enum mlxsw_reg_hpkt_action unreg_action; - u8 trap_group; - bool is_ctrl; /* should go via control buffer or not */ - bool is_event; + }; + enum mlxsw_reg_hpkt_action en_action; /* Action when enabled */ + enum mlxsw_reg_hpkt_action dis_action; /* Action when disabled */ + u8 en_trap_group; /* Trap group when enabled */ + u8 dis_trap_group; /* Trap group when disabled */ + u8 is_ctrl:1, /* should go via control buffer or not */ + is_event:1, + enabled_on_register:1; /* Trap should be enabled when listener + * is registered. + */ }; -#define MLXSW_RXL(_func, _trap_id, _action, _is_ctrl, _trap_group, \ - _unreg_action) \ - { \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - .u.rx_listener = \ - { \ - .func = _func, \ - .local_port = MLXSW_PORT_DONT_CARE, \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - }, \ - .action = MLXSW_REG_HPKT_ACTION_##_action, \ - .unreg_action = MLXSW_REG_HPKT_ACTION_##_unreg_action, \ - .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ - .is_ctrl = _is_ctrl, \ - .is_event = false, \ +#define __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, _enabled_on_register, _dis_trap_group) \ + { \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .rx_listener = \ + { \ + .func = _func, \ + .local_port = MLXSW_PORT_DONT_CARE, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + }, \ + .en_action = MLXSW_REG_HPKT_ACTION_##_en_action, \ + .dis_action = MLXSW_REG_HPKT_ACTION_##_dis_action, \ + .en_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_en_trap_group, \ + .dis_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_dis_trap_group, \ + .is_ctrl = _is_ctrl, \ + .enabled_on_register = _enabled_on_register, \ } -#define MLXSW_EVENTL(_func, _trap_id, _trap_group) \ - { \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - .u.event_listener = \ - { \ - .func = _func, \ - .trap_id = MLXSW_TRAP_ID_##_trap_id, \ - }, \ - .action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, \ - .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ - .is_ctrl = false, \ - .is_event = true, \ +#define MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _trap_group, \ + _dis_action) \ + __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _trap_group, \ + _dis_action, true, _trap_group) + +#define MLXSW_RXL_DIS(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, _dis_trap_group) \ + __MLXSW_RXL(_func, _trap_id, _en_action, _is_ctrl, _en_trap_group, \ + _dis_action, false, _dis_trap_group) + +#define MLXSW_EVENTL(_func, _trap_id, _trap_group) \ + { \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .event_listener = \ + { \ + .func = _func, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + }, \ + .en_action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, \ + .en_trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \ + .is_event = true, \ + .enabled_on_register = true, \ } int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_rx_listener *rxl, - void *priv); + void *priv, bool enabled); void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_rx_listener *rxl, - void *priv); + const struct mlxsw_rx_listener *rxl); int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_event_listener *el, void *priv); void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, - const struct mlxsw_event_listener *el, - void *priv); + const struct mlxsw_event_listener *el); int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, @@ -135,9 +147,9 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv); -int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, - const struct mlxsw_listener *listener, - enum mlxsw_reg_hpkt_action action); +int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + bool enabled); typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, size_t payload_len, unsigned long cb_priv); @@ -315,6 +327,20 @@ struct mlxsw_driver { enum devlink_trap_action action); int (*trap_group_init)(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group); + int (*trap_group_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer); + int (*trap_policer_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); + void (*trap_policer_fini)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); + int (*trap_policer_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack); + int (*trap_policer_counter_get)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); @@ -461,7 +487,10 @@ enum mlxsw_devlink_param_id { }; struct mlxsw_skb_cb { - struct mlxsw_tx_info tx_info; + union { + struct mlxsw_tx_info tx_info; + u32 cookie_index; /* Only used during receive */ + }; }; static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index c51b2adfc1e1..70a104e728f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -7,6 +7,9 @@ #include <linux/errno.h> #include <linux/rhashtable.h> #include <linux/list.h> +#include <linux/idr.h> +#include <linux/refcount.h> +#include <net/flow_offload.h> #include "item.h" #include "trap.h" @@ -63,6 +66,8 @@ struct mlxsw_afa { void *ops_priv; struct rhashtable set_ht; struct rhashtable fwd_entry_ht; + struct rhashtable cookie_ht; + struct idr cookie_idr; }; #define MLXSW_AFA_SET_LEN 0xA8 @@ -121,6 +126,55 @@ static const struct rhashtable_params mlxsw_afa_fwd_entry_ht_params = { .automatic_shrinking = true, }; +struct mlxsw_afa_cookie { + struct rhash_head ht_node; + refcount_t ref_count; + struct rcu_head rcu; + u32 cookie_index; + struct flow_action_cookie fa_cookie; +}; + +static u32 mlxsw_afa_cookie_hash(const struct flow_action_cookie *fa_cookie, + u32 seed) +{ + return jhash2((u32 *) fa_cookie->cookie, + fa_cookie->cookie_len / sizeof(u32), seed); +} + +static u32 mlxsw_afa_cookie_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct flow_action_cookie *fa_cookie = data; + + return mlxsw_afa_cookie_hash(fa_cookie, seed); +} + +static u32 mlxsw_afa_cookie_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_afa_cookie *cookie = data; + + return mlxsw_afa_cookie_hash(&cookie->fa_cookie, seed); +} + +static int mlxsw_afa_cookie_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct flow_action_cookie *fa_cookie = arg->key; + const struct mlxsw_afa_cookie *cookie = obj; + + if (cookie->fa_cookie.cookie_len == fa_cookie->cookie_len) + return memcmp(cookie->fa_cookie.cookie, fa_cookie->cookie, + fa_cookie->cookie_len); + return 1; +} + +static const struct rhashtable_params mlxsw_afa_cookie_ht_params = { + .head_offset = offsetof(struct mlxsw_afa_cookie, ht_node), + .hashfn = mlxsw_afa_cookie_key_hashfn, + .obj_hashfn = mlxsw_afa_cookie_obj_hashfn, + .obj_cmpfn = mlxsw_afa_cookie_obj_cmpfn, + .automatic_shrinking = true, +}; + struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, const struct mlxsw_afa_ops *ops, void *ops_priv) @@ -138,11 +192,18 @@ struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, &mlxsw_afa_fwd_entry_ht_params); if (err) goto err_fwd_entry_rhashtable_init; + err = rhashtable_init(&mlxsw_afa->cookie_ht, + &mlxsw_afa_cookie_ht_params); + if (err) + goto err_cookie_rhashtable_init; + idr_init(&mlxsw_afa->cookie_idr); mlxsw_afa->max_acts_per_set = max_acts_per_set; mlxsw_afa->ops = ops; mlxsw_afa->ops_priv = ops_priv; return mlxsw_afa; +err_cookie_rhashtable_init: + rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); err_fwd_entry_rhashtable_init: rhashtable_destroy(&mlxsw_afa->set_ht); err_set_rhashtable_init: @@ -153,6 +214,9 @@ EXPORT_SYMBOL(mlxsw_afa_create); void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa) { + WARN_ON(!idr_is_empty(&mlxsw_afa->cookie_idr)); + idr_destroy(&mlxsw_afa->cookie_idr); + rhashtable_destroy(&mlxsw_afa->cookie_ht); rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); rhashtable_destroy(&mlxsw_afa->set_ht); kfree(mlxsw_afa); @@ -627,6 +691,151 @@ err_counter_index_get: return ERR_PTR(err); } +/* 20 bits is a maximum that hardware can handle in trap with userdef action + * and carry along with the trapped packet. + */ +#define MLXSW_AFA_COOKIE_INDEX_BITS 20 +#define MLXSW_AFA_COOKIE_INDEX_MAX ((1 << MLXSW_AFA_COOKIE_INDEX_BITS) - 1) + +static struct mlxsw_afa_cookie * +mlxsw_afa_cookie_create(struct mlxsw_afa *mlxsw_afa, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie *cookie; + u32 cookie_index; + int err; + + cookie = kzalloc(sizeof(*cookie) + fa_cookie->cookie_len, GFP_KERNEL); + if (!cookie) + return ERR_PTR(-ENOMEM); + refcount_set(&cookie->ref_count, 1); + memcpy(&cookie->fa_cookie, fa_cookie, + sizeof(*fa_cookie) + fa_cookie->cookie_len); + + err = rhashtable_insert_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Start cookie indexes with 1. Leave the 0 index unused. Packets + * that come from the HW which are not dropped by drop-with-cookie + * action are going to pass cookie_index 0 to lookup. + */ + cookie_index = 1; + err = idr_alloc_u32(&mlxsw_afa->cookie_idr, cookie, &cookie_index, + MLXSW_AFA_COOKIE_INDEX_MAX, GFP_KERNEL); + if (err) + goto err_idr_alloc; + cookie->cookie_index = cookie_index; + return cookie; + +err_idr_alloc: + rhashtable_remove_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); +err_rhashtable_insert: + kfree(cookie); + return ERR_PTR(err); +} + +static void mlxsw_afa_cookie_destroy(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_cookie *cookie) +{ + idr_remove(&mlxsw_afa->cookie_idr, cookie->cookie_index); + rhashtable_remove_fast(&mlxsw_afa->cookie_ht, &cookie->ht_node, + mlxsw_afa_cookie_ht_params); + kfree_rcu(cookie, rcu); +} + +static struct mlxsw_afa_cookie * +mlxsw_afa_cookie_get(struct mlxsw_afa *mlxsw_afa, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie *cookie; + + cookie = rhashtable_lookup_fast(&mlxsw_afa->cookie_ht, fa_cookie, + mlxsw_afa_cookie_ht_params); + if (cookie) { + refcount_inc(&cookie->ref_count); + return cookie; + } + return mlxsw_afa_cookie_create(mlxsw_afa, fa_cookie); +} + +static void mlxsw_afa_cookie_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_cookie *cookie) +{ + if (!refcount_dec_and_test(&cookie->ref_count)) + return; + mlxsw_afa_cookie_destroy(mlxsw_afa, cookie); +} + +/* RCU read lock must be held */ +const struct flow_action_cookie * +mlxsw_afa_cookie_lookup(struct mlxsw_afa *mlxsw_afa, u32 cookie_index) +{ + struct mlxsw_afa_cookie *cookie; + + /* 0 index means no cookie */ + if (!cookie_index) + return NULL; + cookie = idr_find(&mlxsw_afa->cookie_idr, cookie_index); + if (!cookie) + return NULL; + return &cookie->fa_cookie; +} +EXPORT_SYMBOL(mlxsw_afa_cookie_lookup); + +struct mlxsw_afa_cookie_ref { + struct mlxsw_afa_resource resource; + struct mlxsw_afa_cookie *cookie; +}; + +static void +mlxsw_afa_cookie_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_cookie_ref *cookie_ref) +{ + mlxsw_afa_resource_del(&cookie_ref->resource); + mlxsw_afa_cookie_put(block->afa, cookie_ref->cookie); + kfree(cookie_ref); +} + +static void +mlxsw_afa_cookie_ref_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + + cookie_ref = container_of(resource, struct mlxsw_afa_cookie_ref, + resource); + mlxsw_afa_cookie_ref_destroy(block, cookie_ref); +} + +static struct mlxsw_afa_cookie_ref * +mlxsw_afa_cookie_ref_create(struct mlxsw_afa_block *block, + const struct flow_action_cookie *fa_cookie) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + struct mlxsw_afa_cookie *cookie; + int err; + + cookie_ref = kzalloc(sizeof(*cookie_ref), GFP_KERNEL); + if (!cookie_ref) + return ERR_PTR(-ENOMEM); + cookie = mlxsw_afa_cookie_get(block->afa, fa_cookie); + if (IS_ERR(cookie)) { + err = PTR_ERR(cookie); + goto err_cookie_get; + } + cookie_ref->cookie = cookie; + cookie_ref->resource.destructor = mlxsw_afa_cookie_ref_destructor; + mlxsw_afa_resource_add(block, &cookie_ref->resource); + return cookie_ref; + +err_cookie_get: + kfree(cookie_ref); + return ERR_PTR(err); +} + #define MLXSW_AFA_ONE_ACTION_LEN 32 #define MLXSW_AFA_PAYLOAD_OFFSET 4 @@ -747,97 +956,170 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, } EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify); -/* Trap / Discard Action - * --------------------- - * The Trap / Discard action enables trapping / mirroring packets to the CPU +/* Trap Action / Trap With Userdef Action + * -------------------------------------- + * The Trap action enables trapping / mirroring packets to the CPU * as well as discarding packets. * The ACL Trap / Discard separates the forward/discard control from CPU * trap control. In addition, the Trap / Discard action enables activating * SPAN (port mirroring). + * + * The Trap with userdef action action has the same functionality as + * the Trap action with addition of user defined value that can be set + * and used by higher layer applications. */ -#define MLXSW_AFA_TRAPDISC_CODE 0x03 -#define MLXSW_AFA_TRAPDISC_SIZE 1 +#define MLXSW_AFA_TRAP_CODE 0x03 +#define MLXSW_AFA_TRAP_SIZE 1 -enum mlxsw_afa_trapdisc_trap_action { - MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP = 0, - MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP = 2, +#define MLXSW_AFA_TRAPWU_CODE 0x04 +#define MLXSW_AFA_TRAPWU_SIZE 2 + +enum mlxsw_afa_trap_trap_action { + MLXSW_AFA_TRAP_TRAP_ACTION_NOP = 0, + MLXSW_AFA_TRAP_TRAP_ACTION_TRAP = 2, }; -/* afa_trapdisc_trap_action +/* afa_trap_trap_action * Trap Action. */ -MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); +MLXSW_ITEM32(afa, trap, trap_action, 0x00, 24, 4); -enum mlxsw_afa_trapdisc_forward_action { - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, +enum mlxsw_afa_trap_forward_action { + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD = 1, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD = 3, }; -/* afa_trapdisc_forward_action +/* afa_trap_forward_action * Forward Action. */ -MLXSW_ITEM32(afa, trapdisc, forward_action, 0x00, 0, 4); +MLXSW_ITEM32(afa, trap, forward_action, 0x00, 0, 4); -/* afa_trapdisc_trap_id +/* afa_trap_trap_id * Trap ID to configure. */ -MLXSW_ITEM32(afa, trapdisc, trap_id, 0x04, 0, 9); +MLXSW_ITEM32(afa, trap, trap_id, 0x04, 0, 9); -/* afa_trapdisc_mirror_agent +/* afa_trap_mirror_agent * Mirror agent. */ -MLXSW_ITEM32(afa, trapdisc, mirror_agent, 0x08, 29, 3); +MLXSW_ITEM32(afa, trap, mirror_agent, 0x08, 29, 3); -/* afa_trapdisc_mirror_enable +/* afa_trap_mirror_enable * Mirror enable. */ -MLXSW_ITEM32(afa, trapdisc, mirror_enable, 0x08, 24, 1); +MLXSW_ITEM32(afa, trap, mirror_enable, 0x08, 24, 1); + +/* user_def_val + * Value for the SW usage. Can be used to pass information of which + * rule has caused a trap. This may be overwritten by later traps. + * This field does a set on the packet's user_def_val only if this + * is the first trap_id or if the trap_id has replaced the previous + * packet's trap_id. + */ +MLXSW_ITEM32(afa, trap, user_def_val, 0x0C, 0, 20); static inline void -mlxsw_afa_trapdisc_pack(char *payload, - enum mlxsw_afa_trapdisc_trap_action trap_action, - enum mlxsw_afa_trapdisc_forward_action forward_action, - u16 trap_id) +mlxsw_afa_trap_pack(char *payload, + enum mlxsw_afa_trap_trap_action trap_action, + enum mlxsw_afa_trap_forward_action forward_action, + u16 trap_id) { - mlxsw_afa_trapdisc_trap_action_set(payload, trap_action); - mlxsw_afa_trapdisc_forward_action_set(payload, forward_action); - mlxsw_afa_trapdisc_trap_id_set(payload, trap_id); + mlxsw_afa_trap_trap_action_set(payload, trap_action); + mlxsw_afa_trap_forward_action_set(payload, forward_action); + mlxsw_afa_trap_trap_id_set(payload, trap_id); } static inline void -mlxsw_afa_trapdisc_mirror_pack(char *payload, bool mirror_enable, - u8 mirror_agent) +mlxsw_afa_trapwu_pack(char *payload, + enum mlxsw_afa_trap_trap_action trap_action, + enum mlxsw_afa_trap_forward_action forward_action, + u16 trap_id, u32 user_def_val) { - mlxsw_afa_trapdisc_mirror_enable_set(payload, mirror_enable); - mlxsw_afa_trapdisc_mirror_agent_set(payload, mirror_agent); + mlxsw_afa_trap_pack(payload, trap_action, forward_action, trap_id); + mlxsw_afa_trap_user_def_val_set(payload, user_def_val); } -int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) +static inline void +mlxsw_afa_trap_mirror_pack(char *payload, bool mirror_enable, + u8 mirror_agent) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + mlxsw_afa_trap_mirror_enable_set(payload, mirror_enable); + mlxsw_afa_trap_mirror_agent_set(payload, mirror_agent); +} + +static int mlxsw_afa_block_append_drop_plain(struct mlxsw_afa_block *block, + bool ingress) +{ + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, + ingress ? MLXSW_TRAP_ID_DISCARD_INGRESS_ACL : + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL); return 0; } + +static int +mlxsw_afa_block_append_drop_with_cookie(struct mlxsw_afa_block *block, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_cookie_ref *cookie_ref; + u32 cookie_index; + char *act; + int err; + + cookie_ref = mlxsw_afa_cookie_ref_create(block, fa_cookie); + if (IS_ERR(cookie_ref)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create cookie for drop action"); + return PTR_ERR(cookie_ref); + } + cookie_index = cookie_ref->cookie->cookie_index; + + act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPWU_CODE, + MLXSW_AFA_TRAPWU_SIZE); + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append drop with cookie action"); + err = PTR_ERR(act); + goto err_append_action; + } + mlxsw_afa_trapwu_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, + ingress ? MLXSW_TRAP_ID_DISCARD_INGRESS_ACL : + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL, + cookie_index); + return 0; + +err_append_action: + mlxsw_afa_cookie_ref_destroy(block, cookie_ref); + return err; +} + +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block, bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) +{ + return fa_cookie ? + mlxsw_afa_block_append_drop_with_cookie(block, ingress, + fa_cookie, extack) : + mlxsw_afa_block_append_drop_plain(block, ingress); +} EXPORT_SYMBOL(mlxsw_afa_block_append_drop); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, - trap_id); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_DISCARD, trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); @@ -845,15 +1127,13 @@ EXPORT_SYMBOL(mlxsw_afa_block_append_trap); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, u16 trap_id) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, - trap_id); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD, trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); @@ -920,13 +1200,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, u8 mirror_agent) { char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_TRAPDISC_CODE, - MLXSW_AFA_TRAPDISC_SIZE); + MLXSW_AFA_TRAP_CODE, + MLXSW_AFA_TRAP_SIZE); if (IS_ERR(act)) return PTR_ERR(act); - mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, - MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); - mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); + mlxsw_afa_trap_pack(act, MLXSW_AFA_TRAP_TRAP_ACTION_NOP, + MLXSW_AFA_TRAP_FORWARD_ACTION_FORWARD, 0); + mlxsw_afa_trap_mirror_pack(act, true, mirror_agent); return 0; } @@ -958,6 +1238,179 @@ err_append_allocated_mirror: } EXPORT_SYMBOL(mlxsw_afa_block_append_mirror); +/* QoS Action + * ---------- + * The QOS_ACTION is used for manipulating the QoS attributes of a packet. It + * can be used to change the DCSP, ECN, Color and Switch Priority of the packet. + * Note that PCP field can be changed using the VLAN action. + */ + +#define MLXSW_AFA_QOS_CODE 0x06 +#define MLXSW_AFA_QOS_SIZE 1 + +enum mlxsw_afa_qos_ecn_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_ECN_CMD_NOP, + /* Set ECN to afa_qos_ecn */ + MLXSW_AFA_QOS_ECN_CMD_SET, +}; + +/* afa_qos_ecn_cmd + */ +MLXSW_ITEM32(afa, qos, ecn_cmd, 0x04, 29, 3); + +/* afa_qos_ecn + * ECN value. + */ +MLXSW_ITEM32(afa, qos, ecn, 0x04, 24, 2); + +enum mlxsw_afa_qos_dscp_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_DSCP_CMD_NOP, + /* Set DSCP 3 LSB bits according to dscp[2:0] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_3LSB, + /* Set DSCP 3 MSB bits according to dscp[5:3] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_3MSB, + /* Set DSCP 6 bits according to dscp[5:0] */ + MLXSW_AFA_QOS_DSCP_CMD_SET_ALL, +}; + +/* afa_qos_dscp_cmd + * DSCP command. + */ +MLXSW_ITEM32(afa, qos, dscp_cmd, 0x04, 14, 2); + +/* afa_qos_dscp + * DSCP value. + */ +MLXSW_ITEM32(afa, qos, dscp, 0x04, 0, 6); + +enum mlxsw_afa_qos_switch_prio_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_SWITCH_PRIO_CMD_NOP, + /* Set Switch Priority to afa_qos_switch_prio */ + MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET, +}; + +/* afa_qos_switch_prio_cmd + */ +MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2); + +/* afa_qos_switch_prio + * Switch Priority. + */ +MLXSW_ITEM32(afa, qos, switch_prio, 0x08, 0, 4); + +enum mlxsw_afa_qos_dscp_rw { + MLXSW_AFA_QOS_DSCP_RW_PRESERVE, + MLXSW_AFA_QOS_DSCP_RW_SET, + MLXSW_AFA_QOS_DSCP_RW_CLEAR, +}; + +/* afa_qos_dscp_rw + * DSCP Re-write Enable. Controlling the rewrite_enable for DSCP. + */ +MLXSW_ITEM32(afa, qos, dscp_rw, 0x0C, 30, 2); + +static inline void +mlxsw_afa_qos_ecn_pack(char *payload, + enum mlxsw_afa_qos_ecn_cmd ecn_cmd, u8 ecn) +{ + mlxsw_afa_qos_ecn_cmd_set(payload, ecn_cmd); + mlxsw_afa_qos_ecn_set(payload, ecn); +} + +static inline void +mlxsw_afa_qos_dscp_pack(char *payload, + enum mlxsw_afa_qos_dscp_cmd dscp_cmd, u8 dscp) +{ + mlxsw_afa_qos_dscp_cmd_set(payload, dscp_cmd); + mlxsw_afa_qos_dscp_set(payload, dscp); +} + +static inline void +mlxsw_afa_qos_switch_prio_pack(char *payload, + enum mlxsw_afa_qos_switch_prio_cmd prio_cmd, + u8 prio) +{ + mlxsw_afa_qos_switch_prio_cmd_set(payload, prio_cmd); + mlxsw_afa_qos_switch_prio_set(payload, prio); +} + +static int __mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + bool set_dscp, u8 dscp, + bool set_ecn, u8 ecn, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + + if (set_ecn) + mlxsw_afa_qos_ecn_pack(act, MLXSW_AFA_QOS_ECN_CMD_SET, ecn); + if (set_dscp) { + mlxsw_afa_qos_dscp_pack(act, MLXSW_AFA_QOS_DSCP_CMD_SET_ALL, + dscp); + mlxsw_afa_qos_dscp_rw_set(act, MLXSW_AFA_QOS_DSCP_RW_CLEAR); + } + + return 0; +} + +int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + u8 dsfield, + struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + true, dsfield >> 2, + true, dsfield & 0x03, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dsfield); + +int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block, + u8 dscp, struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + true, dscp, + false, 0, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_dscp); + +int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block, + u8 ecn, struct netlink_ext_ack *extack) +{ + return __mlxsw_afa_block_append_qos_dsfield(block, + false, 0, + true, ecn, + extack); +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_ecn); + +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_SWITCH_PRIO_CMD_SET, + prio); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_switch_prio); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 0e3a59dda12e..8c2705e16ef7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/netdevice.h> +#include <net/flow_offload.h> struct mlxsw_afa; struct mlxsw_afa_block; @@ -42,7 +43,11 @@ int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity); int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block); -int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); +const struct flow_action_cookie * +mlxsw_afa_cookie_lookup(struct mlxsw_afa *mlxsw_afa, u32 cookie_index); +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block, bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, u16 trap_id); @@ -57,6 +62,16 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, u16 vid, u8 pcp, u8 et, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_dsfield(struct mlxsw_afa_block *block, + u8 dsfield, + struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_dscp(struct mlxsw_afa_block *block, + u8 dscp, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_ecn(struct mlxsw_afa_block *block, + u8 ecn, struct netlink_ext_ack *extack); int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index feb4672a5ac0..9f6905fa6b47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -9,6 +9,41 @@ #include "item.h" #include "core_acl_flex_keys.h" +/* For the purpose of the driver, define an internal storage scratchpad + * that will be used to store key/mask values. For each defined element type + * define an internal storage geometry. + * + * When adding new elements, MLXSW_AFK_ELEMENT_STORAGE_SIZE must be increased + * accordingly. + */ +static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { + MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4), + MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), + MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), + MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), +}; + struct mlxsw_afk { struct list_head key_info_list; unsigned int max_blocks; @@ -26,13 +61,15 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i]; for (j = 0; j < block->instances_count; j++) { + const struct mlxsw_afk_element_info *elinfo; struct mlxsw_afk_element_inst *elinst; elinst = &block->instances[j]; - if (elinst->type != elinst->info->type || + elinfo = &mlxsw_afk_element_infos[elinst->element]; + if (elinst->type != elinfo->type || (!elinst->avoid_size_check && elinst->item.size.bits != - elinst->info->item.size.bits)) + elinfo->item.size.bits)) return false; } } @@ -72,7 +109,7 @@ struct mlxsw_afk_key_info { * is index inside "blocks" */ struct mlxsw_afk_element_usage elusage; - const struct mlxsw_afk_block *blocks[0]; + const struct mlxsw_afk_block *blocks[]; }; static bool @@ -116,7 +153,7 @@ static void mlxsw_afk_picker_count_hits(struct mlxsw_afk *mlxsw_afk, struct mlxsw_afk_element_inst *elinst; elinst = &block->instances[j]; - if (elinst->info->element == element) { + if (elinst->element == element) { __set_bit(element, picker->hits[i].element); picker->hits[i].total++; } @@ -301,7 +338,7 @@ mlxsw_afk_block_elinst_get(const struct mlxsw_afk_block *block, struct mlxsw_afk_element_inst *elinst; elinst = &block->instances[i]; - if (elinst->info->element == element) + if (elinst->element == element) return elinst; } return NULL; @@ -409,9 +446,12 @@ static void mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, char *output, char *storage, int u32_diff) { - const struct mlxsw_item *storage_item = &elinst->info->item; const struct mlxsw_item *output_item = &elinst->item; + const struct mlxsw_afk_element_info *elinfo; + const struct mlxsw_item *storage_item; + elinfo = &mlxsw_afk_element_infos[elinst->element]; + storage_item = &elinfo->item; if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) mlxsw_sp_afk_encode_u32(storage_item, output_item, storage, output, u32_diff); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index cb229b55ecc4..a47a17c04c62 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -69,42 +69,10 @@ struct mlxsw_afk_element_info { MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ _element, _offset, 0, _size) -/* For the purpose of the driver, define an internal storage scratchpad - * that will be used to store key/mask values. For each defined element type - * define an internal storage geometry. - */ -static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { - MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), - MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), - MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), - MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4), - MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), - MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), - MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), - MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), - MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), - MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), - MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), - MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), - MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), - MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), -}; - #define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40 struct mlxsw_afk_element_inst { /* element instance in actual block */ - const struct mlxsw_afk_element_info *info; + enum mlxsw_afk_element element; enum mlxsw_afk_element_type type; struct mlxsw_item item; /* element geometry in block */ int u32_key_diff; /* in case value needs to be adjusted before write @@ -116,7 +84,7 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */ #define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, \ _shift, _size, _u32_key_diff, _avoid_size_check) \ { \ - .info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \ + .element = MLXSW_AFK_ELEMENT_##_element, \ .type = _type, \ .item = { \ .offset = _offset, \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 34566eb62c47..939b692ffc33 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -53,6 +53,7 @@ /** * struct mlxsw_i2c - device private data: + * @cmd: command attributes; * @cmd.mb_size_in: input mailbox size; * @cmd.mb_off_in: input mailbox offset in register space; * @cmd.mb_size_out: output mailbox size; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index e9ded1a6e131..fd0e97de44e7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -575,6 +575,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); + if (rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_INGRESS_ACL || + rx_info.trap_id == MLXSW_TRAP_ID_DISCARD_EGRESS_ACL) { + u32 cookie_index = 0; + + if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) + cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe); + mlxsw_skb_cb(skb)->cookie_index = cookie_index; + } + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) byte_count -= ETH_FCS_LEN; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 43fa8c85b5d9..32c7cabfb261 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -25,8 +25,6 @@ #define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24 #define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 -#define MLXSW_PCI_SW_RESET 0xF0010 -#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 200 #define MLXSW_PCI_FW_READY 0xA1844 @@ -210,6 +208,11 @@ MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5); MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6); mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12); +/* pci_cqe_user_def_val_orig_pkt_len + * When trap_id is an ACL: User defined value from policy engine action. + */ +MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20); + /* pci_cqe_owner * Ownership bit. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index e05d1d1be2fd..9b39b8e70519 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -621,7 +621,7 @@ static inline void mlxsw_reg_sfn_pack(char *payload) { MLXSW_REG_ZERO(sfn, payload); mlxsw_reg_sfn_swid_set(payload, 0); - mlxsw_reg_sfn_end_set(payload, 1); + mlxsw_reg_sfn_end_set(payload, 0); mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT); } @@ -3296,6 +3296,12 @@ MLXSW_ITEM32(reg, qpcr, g, 0x00, 14, 2); */ MLXSW_ITEM32(reg, qpcr, pid, 0x00, 0, 14); +/* reg_qpcr_clear_counter + * Clear counters. + * Access: OP + */ +MLXSW_ITEM32(reg, qpcr, clear_counter, 0x04, 31, 1); + /* reg_qpcr_color_aware * Is the policer aware of colors. * Must be 0 (unaware) for cpu port. @@ -3393,6 +3399,17 @@ enum mlxsw_reg_qpcr_action { */ MLXSW_ITEM32(reg, qpcr, violate_action, 0x18, 0, 4); +/* reg_qpcr_violate_count + * Counts the number of times violate_action happened on this PID. + * Access: RW + */ +MLXSW_ITEM64(reg, qpcr, violate_count, 0x20, 0, 64); + +#define MLXSW_REG_QPCR_LOWEST_CIR 1 +#define MLXSW_REG_QPCR_HIGHEST_CIR (2 * 1000 * 1000 * 1000) /* 2Gpps */ +#define MLXSW_REG_QPCR_LOWEST_CBS 4 +#define MLXSW_REG_QPCR_HIGHEST_CBS 24 + static inline void mlxsw_reg_qpcr_pack(char *payload, u16 pid, enum mlxsw_reg_qpcr_ir_units ir_units, bool bytes, u32 cir, u16 cbs) @@ -5440,15 +5457,29 @@ enum mlxsw_reg_pmtm_module_type { /* Backplane with 4 lanes */ MLXSW_REG_PMTM_MODULE_TYPE_BP_4X, /* QSFP */ - MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP, /* SFP */ - MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP, + MLXSW_REG_PMTM_MODULE_TYPE_SFP, /* Backplane with single lane */ MLXSW_REG_PMTM_MODULE_TYPE_BP_1X = 4, /* Backplane with two lane */ MLXSW_REG_PMTM_MODULE_TYPE_BP_2X = 8, - /* Chip2Chip */ - MLXSW_REG_PMTM_MODULE_TYPE_C2C = 10, + /* Chip2Chip4x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C4X = 10, + /* Chip2Chip2x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C2X, + /* Chip2Chip1x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C1X, + /* QSFP-DD */ + MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14, + /* OSFP */ + MLXSW_REG_PMTM_MODULE_TYPE_OSFP, + /* SFP-DD */ + MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD, + /* DSFP */ + MLXSW_REG_PMTM_MODULE_TYPE_DSFP, + /* Chip2Chip8x */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C8X, }; /* reg_pmtm_module_type @@ -5506,12 +5537,10 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, @@ -5526,9 +5555,11 @@ enum mlxsw_reg_htgt_trap_group { enum mlxsw_reg_htgt_discard_trap_group { MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY, MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS, }; /* reg_htgt_trap_group diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 6534184cb942..d62496ef299c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -18,6 +18,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_CQE_V1, MLXSW_RES_ID_CQE_V2, MLXSW_RES_ID_COUNTER_POOL_SIZE, + MLXSW_RES_ID_COUNTER_BANK_SIZE, MLXSW_RES_ID_MAX_SPAN, MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, @@ -75,6 +76,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_CQE_V1] = 0x2211, [MLXSW_RES_ID_CQE_V2] = 0x2212, [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, + [MLXSW_RES_ID_COUNTER_BANK_SIZE] = 0x2411, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7358b5bc7eb6..24ca8d5bc564 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -43,6 +43,7 @@ #include "spectrum_acl_flex_actions.h" #include "spectrum_span.h" #include "spectrum_ptp.h" +#include "spectrum_trap.h" #include "../mlxfw/mlxfw.h" #define MLXSW_SP1_FWREV_MAJOR 13 @@ -347,19 +348,6 @@ static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); } -static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev, - const char *msg, const char *comp_name, - u32 done_bytes, u32 total_bytes) -{ - struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = - container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; - - devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core), - msg, comp_name, - done_bytes, total_bytes); -} - static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .component_query = mlxsw_sp_component_query, .fsm_lock = mlxsw_sp_fsm_lock, @@ -370,7 +358,6 @@ static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .fsm_query_state = mlxsw_sp_fsm_query_state, .fsm_cancel = mlxsw_sp_fsm_cancel, .fsm_release = mlxsw_sp_fsm_release, - .status_notify = mlxsw_sp_status_notify, }; static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, @@ -382,16 +369,15 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, .ops = &mlxsw_sp_mlxfw_dev_ops, .psid = mlxsw_sp->bus_info->psid, .psid_size = strlen(mlxsw_sp->bus_info->psid), + .devlink = priv_to_devlink(mlxsw_sp->core), }, .mlxsw_sp = mlxsw_sp }; int err; mlxsw_core_fw_flash_start(mlxsw_sp->core); - devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core)); err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware, extack); - devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core)); mlxsw_core_fw_flash_end(mlxsw_sp->core); return err; @@ -1798,6 +1784,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_TBF: return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_FIFO: + return mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -2243,6 +2231,15 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_3635_stats[] = { #define MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN \ ARRAY_SIZE(mlxsw_sp_port_hw_rfc_3635_stats) +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_ext_stats[] = { + { + .str = "ecn_marked", + .getter = mlxsw_reg_ppcnt_ecn_marked_get, + }, +}; + +#define MLXSW_SP_PORT_HW_EXT_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_ext_stats) + static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_discard_stats[] = { { .str = "discard_ingress_general", @@ -2352,6 +2349,7 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN + \ + MLXSW_SP_PORT_HW_EXT_STATS_LEN + \ MLXSW_SP_PORT_HW_DISCARD_STATS_LEN + \ (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \ IEEE_8021QAZ_MAX_TCS) + \ @@ -2413,6 +2411,12 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, p += ETH_GSTRING_LEN; } + for (i = 0; i < MLXSW_SP_PORT_HW_EXT_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_ext_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; i++) { memcpy(p, mlxsw_sp_port_hw_discard_stats[i].str, ETH_GSTRING_LEN); @@ -2474,6 +2478,10 @@ mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, *p_hw_stats = mlxsw_sp_port_hw_rfc_3635_stats; *p_len = MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; break; + case MLXSW_REG_PPCNT_EXT_CNT: + *p_hw_stats = mlxsw_sp_port_hw_ext_stats; + *p_len = MLXSW_SP_PORT_HW_EXT_STATS_LEN; + break; case MLXSW_REG_PPCNT_DISCARD_CNT: *p_hw_stats = mlxsw_sp_port_hw_discard_stats; *p_len = MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; @@ -2543,6 +2551,11 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, data, data_index); data_index += MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; + /* Extended Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_EXT_STATS_LEN; + /* Discard Counters */ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_DISCARD_CNT, 0, data, data_index); @@ -2788,27 +2801,6 @@ static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u8 width, return ptys_proto; } -static u32 -mlxsw_sp1_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed) -{ - u32 ptys_proto = 0; - int i; - - for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp1_port_link_mode[i].speed <= upper_speed) - ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; - } - return ptys_proto; -} - -static int -mlxsw_sp1_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u32 *base_speed) -{ - *base_speed = MLXSW_SP_PORT_BASE_SPEED_25G; - return 0; -} - static void mlxsw_sp1_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, bool autoneg) @@ -2833,8 +2825,6 @@ mlxsw_sp1_port_type_speed_ops = { .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp1_to_ptys_speed, - .to_ptys_upper_speed = mlxsw_sp1_to_ptys_upper_speed, - .port_speed_base = mlxsw_sp1_port_speed_base, .reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack, .reg_ptys_eth_unpack = mlxsw_sp1_reg_ptys_eth_unpack, }; @@ -3235,51 +3225,6 @@ static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, return ptys_proto; } -static u32 -mlxsw_sp2_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed) -{ - u32 ptys_proto = 0; - int i; - - for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp2_port_link_mode[i].speed <= upper_speed) - ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; - } - return ptys_proto; -} - -static int -mlxsw_sp2_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u32 *base_speed) -{ - char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_cap; - int err; - - /* In Spectrum-2, the speed of 1x can change from port to port, so query - * it from firmware. - */ - mlxsw_reg_ptys_ext_eth_pack(ptys_pl, local_port, 0, false); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) - return err; - mlxsw_reg_ptys_ext_eth_unpack(ptys_pl, ð_proto_cap, NULL, NULL); - - if (eth_proto_cap & - MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR) { - *base_speed = MLXSW_SP_PORT_BASE_SPEED_50G; - return 0; - } - - if (eth_proto_cap & - MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR) { - *base_speed = MLXSW_SP_PORT_BASE_SPEED_25G; - return 0; - } - - return -EIO; -} - static void mlxsw_sp2_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, @@ -3305,8 +3250,6 @@ mlxsw_sp2_port_type_speed_ops = { .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp2_to_ptys_speed, - .to_ptys_upper_speed = mlxsw_sp2_to_ptys_upper_speed, - .port_speed_base = mlxsw_sp2_port_speed_base, .reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack, .reg_ptys_eth_unpack = mlxsw_sp2_reg_ptys_eth_unpack, }; @@ -3520,24 +3463,24 @@ static int mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper; const struct mlxsw_sp_port_type_speed_ops *ops; char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_admin; - u32 upper_speed; - u32 base_speed; int err; ops = mlxsw_sp->port_type_speed_ops; - err = ops->port_speed_base(mlxsw_sp, mlxsw_sp_port->local_port, - &base_speed); + /* Set advertised speeds to supported speeds. */ + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; - upper_speed = base_speed * mlxsw_sp_port->mapping.width; - eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed); + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, - eth_proto_admin, mlxsw_sp_port->link.autoneg); + eth_proto_cap, mlxsw_sp_port->link.autoneg); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } @@ -4614,6 +4557,7 @@ static const struct mlxsw_listener mlxsw_sp1_listener[] = { static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); char qpcr_pl[MLXSW_REG_QPCR_LEN]; enum mlxsw_reg_qpcr_ir_units ir_units; int max_cpu_policers; @@ -4636,7 +4580,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: rate = 128; burst_size = 7; @@ -4649,7 +4592,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: @@ -4677,6 +4619,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) continue; } + __set_bit(i, mlxsw_sp->trap->policers_usage); mlxsw_reg_qpcr_pack(qpcr_pl, i, ir_units, is_bytes, rate, burst_size); err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(qpcr), qpcr_pl); @@ -4729,19 +4672,20 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: priority = 1; tc = 1; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: + priority = 0; + tc = 1; + break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT: priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY; tc = MLXSW_REG_HTGT_DEFAULT_TC; @@ -4805,20 +4749,32 @@ static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_trap *trap; + u64 max_policers; int err; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_CPU_POLICERS)) + return -EIO; + max_policers = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_CPU_POLICERS); + trap = kzalloc(struct_size(trap, policers_usage, + BITS_TO_LONGS(max_policers)), GFP_KERNEL); + if (!trap) + return -ENOMEM; + trap->max_policers = max_policers; + mlxsw_sp->trap = trap; + err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); if (err) - return err; + goto err_cpu_policers_set; err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); if (err) - return err; + goto err_trap_groups_set; err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, ARRAY_SIZE(mlxsw_sp_listener)); if (err) - return err; + goto err_traps_register; err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, mlxsw_sp->listeners_count); @@ -4830,6 +4786,10 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) err_extra_traps_init: mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, ARRAY_SIZE(mlxsw_sp_listener)); +err_traps_register: +err_trap_groups_set: +err_cpu_policers_set: + kfree(trap); return err; } @@ -4839,6 +4799,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp->listeners_count); mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, ARRAY_SIZE(mlxsw_sp_listener)); + kfree(mlxsw_sp->trap); } #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe @@ -4935,16 +4896,35 @@ static const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = { }; #define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 +#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50 + +static u32 __mlxsw_sp_span_buffsize_get(int mtu, u32 speed, u32 buffer_factor) +{ + return 3 * mtu + buffer_factor * speed / 1000; +} static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed) { - return 3 * mtu + MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR * speed / 1000; + int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_span_buffsize_get(mtu, speed, factor); } static const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = { .buffsize_get = mlxsw_sp2_span_buffsize_get, }; +static u32 mlxsw_sp3_span_buffsize_get(int mtu, u32 speed) +{ + int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR; + + return __mlxsw_sp_span_buffsize_get(mtu, speed, factor); +} + +static const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops = { + .buffsize_get = mlxsw_sp3_span_buffsize_get, +}; + u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed) { u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu); @@ -5223,7 +5203,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; - mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -5460,8 +5440,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; @@ -5479,8 +5464,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; @@ -5684,6 +5674,11 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, @@ -5718,6 +5713,11 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, @@ -5751,6 +5751,11 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .trap_fini = mlxsw_sp_trap_fini, .trap_action_set = mlxsw_sp_trap_action_set, .trap_group_init = mlxsw_sp_trap_group_init, + .trap_group_set = mlxsw_sp_trap_group_set, + .trap_policer_init = mlxsw_sp_trap_policer_init, + .trap_policer_fini = mlxsw_sp_trap_policer_fini, + .trap_policer_set = mlxsw_sp_trap_policer_set, + .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, @@ -6316,7 +6321,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, return -EINVAL; } if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, lower_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, lower_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } @@ -6472,7 +6477,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, return -EINVAL; } if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } @@ -6549,7 +6554,7 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, if (!info->linking) break; if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } @@ -6609,7 +6614,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, if (!info->linking) break; if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev)) { + !mlxsw_sp_rif_exists(mlxsw_sp, br_dev)) { NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a0f1f9dceec5..ca56e72cb4b7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -19,6 +19,7 @@ #include <net/pkt_cls.h> #include <net/red.h> #include <net/vxlan.h> +#include <net/flow_offload.h> #include "port.h" #include "core.h" @@ -32,9 +33,6 @@ #define MLXSW_SP_MID_MAX 7000 -#define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */ -#define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */ - #define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 @@ -48,6 +46,10 @@ #define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS "counters" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW "flow" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" + enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD = 1, MLXSW_SP_RESOURCE_KVD_LINEAR, @@ -57,6 +59,9 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, MLXSW_SP_RESOURCE_SPAN, + MLXSW_SP_RESOURCE_COUNTERS, + MLXSW_SP_RESOURCE_COUNTERS_FLOW, + MLXSW_SP_RESOURCE_COUNTERS_RIF, }; struct mlxsw_sp_port; @@ -141,6 +146,7 @@ struct mlxsw_sp_port_type_speed_ops; struct mlxsw_sp_ptp_state; struct mlxsw_sp_ptp_ops; struct mlxsw_sp_span_ops; +struct mlxsw_sp_qdisc_state; struct mlxsw_sp_port_mapping { u8 module; @@ -168,12 +174,9 @@ struct mlxsw_sp { struct notifier_block netdevice_nb; struct mlxsw_sp_ptp_clock *clock; struct mlxsw_sp_ptp_state *ptp_state; - struct mlxsw_sp_counter_pool *counter_pool; - struct { - struct mlxsw_sp_span_entry *entries; - int entries_count; - } span; + struct mlxsw_sp_span *span; + struct mlxsw_sp_trap *trap; const struct mlxsw_fw_rev *req_rev; const char *fw_filename; const struct mlxsw_sp_kvdl_ops *kvdl_ops; @@ -282,8 +285,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; struct mlxsw_sp_port_vlan *default_vlan; - struct mlxsw_sp_qdisc *root_qdisc; - struct mlxsw_sp_qdisc *tclass_qdiscs; + struct mlxsw_sp_qdisc_state *qdisc; unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; @@ -313,9 +315,6 @@ struct mlxsw_sp_port_type_speed_ops { u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd); u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); - u32 (*to_ptys_upper_speed)(struct mlxsw_sp *mlxsw_sp, u32 upper_speed); - int (*port_speed_base)(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u32 *base_speed); void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, bool autoneg); void (*reg_ptys_eth_unpack)(struct mlxsw_sp *mlxsw_sp, char *payload, @@ -468,10 +467,6 @@ int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack); void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, const struct net_device *vxlan_dev); -struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - u16 vid, - struct netlink_ext_ack *extack); extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ @@ -556,7 +551,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, +bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, @@ -571,10 +566,10 @@ void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); -struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); +bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); -struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif); int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, enum mlxsw_sp_l3proto ul_proto, const union mlxsw_sp_l3addr *ul_sip, @@ -653,7 +648,9 @@ struct mlxsw_sp_acl_rule_info { struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; u8 action_created:1, - egress_bind_blocker:1; + ingress_bind_blocker:1, + egress_bind_blocker:1, + counter_valid:1; unsigned int counter_index; }; @@ -672,16 +669,20 @@ struct mlxsw_sp_acl_block { struct mlxsw_sp *mlxsw_sp; unsigned int rule_count; unsigned int disable_count; + unsigned int ingress_blocker_rule_count; unsigned int egress_blocker_rule_count; + unsigned int ingress_binding_count; + unsigned int egress_binding_count; struct net *net; }; struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block); -unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block); +unsigned int +mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block); void mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block); void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block); -bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block); struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net); void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block); @@ -694,7 +695,9 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, bool ingress); -bool mlxsw_sp_acl_block_is_egress_bound(struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block); +bool mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block); struct mlxsw_sp_acl_ruleset * mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, u32 chain_index, @@ -726,7 +729,10 @@ int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, u16 group_id); int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei); -int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, @@ -741,6 +747,14 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 action, u16 vid, u16 proto, u8 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + enum flow_action_mangle_base htype, + u32 offset, u32 mask, u32 val, + struct netlink_ext_ack *extack); int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack); @@ -773,10 +787,17 @@ struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule); int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule, - u64 *packets, u64 *bytes, u64 *last_use); + u64 *packets, u64 *bytes, u64 *last_use, + enum flow_action_hw_stats *used_hw_stats); struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); +static inline const struct flow_action_cookie * +mlxsw_sp_acl_act_cookie_lookup(struct mlxsw_sp *mlxsw_sp, u32 cookie_index) +{ + return mlxsw_afa_cookie_lookup(mlxsw_sp->afa, cookie_index); +} + int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); @@ -864,6 +885,8 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_ets_qopt_offload *p); int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_tbf_qopt_offload *p); +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); @@ -974,9 +997,6 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, enum mlxsw_sp_l3proto proto, union mlxsw_sp_l3addr *addr); -u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp); -bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, - u32 tb_id, __be32 addr); int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, struct mlxsw_sp_nve_params *params, struct netlink_ext_ack *extack); @@ -1003,6 +1023,22 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, enum devlink_trap_action action); int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group); +int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer); +int +mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); +void mlxsw_sp_trap_policer_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer); +int +mlxsw_sp_trap_policer_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, struct netlink_ext_ack *extack); +int +mlxsw_sp_trap_policer_counter_get(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops); static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c index 09ee0a807747..a9fff8adc75e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c @@ -60,7 +60,7 @@ static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = { struct mlxsw_sp1_kvdl_part { struct mlxsw_sp1_kvdl_part_info info; - unsigned long usage[0]; /* Entries */ + unsigned long usage[]; /* Entries */ }; struct mlxsw_sp1_kvdl { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c index 8d14770766b4..3a73d654017f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c @@ -45,7 +45,7 @@ struct mlxsw_sp2_kvdl_part { unsigned int usage_bit_count; unsigned int indexes_per_usage_bit; unsigned int last_allocated_bit; - unsigned long usage[0]; /* Usage bits */ + unsigned long usage[]; /* Usage bits */ }; struct mlxsw_sp2_kvdl { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 3d3cca596116..67ee880a8727 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -58,7 +58,7 @@ struct mlxsw_sp_acl_ruleset { struct mlxsw_sp_acl_ruleset_ht_key ht_key; struct rhashtable rule_ht; unsigned int ref_count; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -71,7 +71,7 @@ struct mlxsw_sp_acl_rule { u64 last_used; u64 last_packets; u64 last_bytes; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -99,7 +99,8 @@ struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block) return block->mlxsw_sp; } -unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block) +unsigned int +mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block) { return block ? block->rule_count : 0; } @@ -116,20 +117,24 @@ void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block) block->disable_count--; } -bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block) +bool mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block) { return block->disable_count; } -bool mlxsw_sp_acl_block_is_egress_bound(struct mlxsw_sp_acl_block *block) +bool mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block) { - struct mlxsw_sp_acl_block_binding *binding; + return block->egress_binding_count; +} - list_for_each_entry(binding, &block->binding_list, list) { - if (!binding->ingress) - return true; - } - return false; +bool mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block) +{ + return block->ingress_binding_count; +} + +bool mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block) +{ + return block->ingress_binding_count && block->egress_binding_count; } static bool @@ -163,7 +168,8 @@ mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, binding->mlxsw_sp_port, binding->ingress); } -static bool mlxsw_sp_acl_ruleset_block_bound(struct mlxsw_sp_acl_block *block) +static bool +mlxsw_sp_acl_ruleset_block_bound(const struct mlxsw_sp_acl_block *block) { return block->ruleset_zero; } @@ -250,6 +256,11 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress))) return -EEXIST; + if (ingress && block->ingress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to ingress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + if (!ingress && block->egress_blocker_rule_count) { NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); return -EOPNOTSUPP; @@ -267,6 +278,10 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, goto err_ruleset_bind; } + if (ingress) + block->ingress_binding_count++; + else + block->egress_binding_count++; list_add(&binding->list, &block->binding_list); return 0; @@ -288,6 +303,11 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp, list_del(&binding->list); + if (ingress) + block->ingress_binding_count--; + else + block->egress_binding_count--; + if (mlxsw_sp_acl_ruleset_block_bound(block)) mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding); @@ -515,9 +535,13 @@ int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei) return mlxsw_afa_block_terminate(rulei->act_block); } -int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei, + bool ingress, + const struct flow_action_cookie *fa_cookie, + struct netlink_ext_ack *extack) { - return mlxsw_afa_block_append_drop(rulei->act_block); + return mlxsw_afa_block_append_drop(rulei->act_block, ingress, + fa_cookie, extack); } int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) @@ -614,12 +638,126 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, } } +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack) +{ + /* Even though both Linux and Spectrum switches support 16 priorities, + * spectrum_qdisc only processes the first eight priomap elements, and + * the DCB and PFC features are tied to 8 priorities as well. Therefore + * bounce attempts to prioritize packets to higher priorities. + */ + if (prio >= IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_MOD(extack, "Only priorities 0..7 are supported"); + return -EINVAL; + } + return mlxsw_afa_block_append_qos_switch_prio(rulei->act_block, prio, + extack); +} + +enum mlxsw_sp_acl_mangle_field { + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, + MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, +}; + +struct mlxsw_sp_acl_mangle_action { + enum flow_action_mangle_base htype; + /* Offset is u32-aligned. */ + u32 offset; + /* Mask bits are unset for the modified field. */ + u32 mask; + /* Shift required to extract the set value. */ + u32 shift; + enum mlxsw_sp_acl_mangle_field field; +}; + +#define MLXSW_SP_ACL_MANGLE_ACTION(_htype, _offset, _mask, _shift, _field) \ + { \ + .htype = _htype, \ + .offset = _offset, \ + .mask = _mask, \ + .shift = _shift, \ + .field = MLXSW_SP_ACL_MANGLE_FIELD_##_field, \ + } + +#define MLXSW_SP_ACL_MANGLE_ACTION_IP4(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP4, \ + _offset, _mask, _shift, _field) + +#define MLXSW_SP_ACL_MANGLE_ACTION_IP6(_offset, _mask, _shift, _field) \ + MLXSW_SP_ACL_MANGLE_ACTION(FLOW_ACT_MANGLE_HDR_TYPE_IP6, \ + _offset, _mask, _shift, _field) + +static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = { + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff00ffff, 16, IP_DSFIELD), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xff03ffff, 18, IP_DSCP), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(0, 0xfffcffff, 16, IP_ECN), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf00fffff, 20, IP_DSFIELD), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xf03fffff, 22, IP_DSCP), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(0, 0xffcfffff, 20, IP_ECN), +}; + +static int +mlxsw_sp_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_mangle_action *mact, + u32 val, struct netlink_ext_ack *extack) +{ + switch (mact->field) { + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD: + return mlxsw_afa_block_append_qos_dsfield(rulei->act_block, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP: + return mlxsw_afa_block_append_qos_dscp(rulei->act_block, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN: + return mlxsw_afa_block_append_qos_ecn(rulei->act_block, + val, extack); + } + + /* We shouldn't have gotten a match in the first place! */ + WARN_ONCE(1, "Unhandled mangle field"); + return -EINVAL; +} + +int mlxsw_sp_acl_rulei_act_mangle(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + enum flow_action_mangle_base htype, + u32 offset, u32 mask, u32 val, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_acl_mangle_action *mact; + size_t i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_acl_mangle_actions); ++i) { + mact = &mlxsw_sp_acl_mangle_actions[i]; + if (mact->htype == htype && + mact->offset == offset && + mact->mask == mask) { + val >>= mact->shift; + return mlxsw_sp_acl_rulei_act_mangle_field(mlxsw_sp, + rulei, mact, + val, extack); + } + } + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return -EINVAL; +} + int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack) { - return mlxsw_afa_block_append_counter(rulei->act_block, - &rulei->counter_index, extack); + int err; + + err = mlxsw_afa_block_append_counter(rulei->act_block, + &rulei->counter_index, extack); + if (err) + return err; + rulei->counter_valid = true; + return 0; } int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, @@ -707,6 +845,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, list_add_tail(&rule->list, &mlxsw_sp->acl->rules); mutex_unlock(&mlxsw_sp->acl->rules_lock); block->rule_count++; + block->ingress_blocker_rule_count += rule->rulei->ingress_bind_blocker; block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0; @@ -726,6 +865,7 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; + block->ingress_blocker_rule_count -= rule->rulei->ingress_bind_blocker; ruleset->ht_key.block->rule_count--; mutex_lock(&mlxsw_sp->acl->rules_lock); list_del(&rule->list); @@ -827,20 +967,24 @@ static void mlxsw_sp_acl_rule_activity_update_work(struct work_struct *work) int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule, - u64 *packets, u64 *bytes, u64 *last_use) + u64 *packets, u64 *bytes, u64 *last_use, + enum flow_action_hw_stats *used_hw_stats) { struct mlxsw_sp_acl_rule_info *rulei; - u64 current_packets; - u64 current_bytes; + u64 current_packets = 0; + u64 current_bytes = 0; int err; rulei = mlxsw_sp_acl_rule_rulei(rule); - err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, - ¤t_packets, ¤t_bytes); - if (err) - return err; - + if (rulei->counter_valid) { + err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, + ¤t_packets, + ¤t_bytes); + if (err) + return err; + *used_hw_stats = FLOW_ACTION_HW_STATS_IMMEDIATE; + } *packets = current_packets - rule->last_packets; *bytes = current_bytes - rule->last_bytes; *last_use = rule->last_used; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c index 3a2de13fcb68..dbd3bebf11ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -13,7 +13,7 @@ struct mlxsw_sp_acl_bf { struct mutex lock; /* Protects Bloom Filter updates. */ unsigned int bank_size; - refcount_t refcnt[0]; + refcount_t refcnt[]; }; /* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index e993159e8e4c..430da69003d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -224,7 +224,7 @@ struct mlxsw_sp_acl_tcam_vchunk; struct mlxsw_sp_acl_tcam_chunk { struct mlxsw_sp_acl_tcam_vchunk *vchunk; struct mlxsw_sp_acl_tcam_region *region; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -243,7 +243,7 @@ struct mlxsw_sp_acl_tcam_vchunk { struct mlxsw_sp_acl_tcam_entry { struct mlxsw_sp_acl_tcam_ventry *ventry; struct mlxsw_sp_acl_tcam_chunk *chunk; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 5965913565a5..96437992b102 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -20,7 +20,7 @@ struct mlxsw_sp_acl_tcam { struct mutex lock; /* guards vregion list */ struct list_head vregion_list; u32 vregion_rehash_intrvl; /* ms */ - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -86,7 +86,7 @@ struct mlxsw_sp_acl_tcam_region { char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; struct mlxsw_afk_key_info *key_info; struct mlxsw_sp *mlxsw_sp; - unsigned long priv[0]; + unsigned long priv[]; /* priv has to be always the last item */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c index 83c2e1e5f216..7974982533b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -3,92 +3,147 @@ #include <linux/kernel.h> #include <linux/bitops.h> +#include <linux/spinlock.h> #include "spectrum_cnt.h" -#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096 - struct mlxsw_sp_counter_sub_pool { + u64 size; unsigned int base_index; - unsigned int size; + enum mlxsw_res_id entry_size_res_id; + const char *resource_name; /* devlink resource name */ + u64 resource_id; /* devlink resource id */ unsigned int entry_size; unsigned int bank_count; + atomic_t active_entries_count; }; struct mlxsw_sp_counter_pool { - unsigned int pool_size; + u64 pool_size; unsigned long *usage; /* Usage bitmap */ - struct mlxsw_sp_counter_sub_pool *sub_pools; + spinlock_t counter_pool_lock; /* Protects counter pool allocations */ + atomic_t active_entries_count; + unsigned int sub_pools_count; + struct mlxsw_sp_counter_sub_pool sub_pools[]; }; -static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { +static const struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_FLOW, .bank_count = 6, }, [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_RIF, .bank_count = 2, } }; -static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp) +static u64 mlxsw_sp_counter_sub_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_sub_pool *sub_pool = priv; + + return atomic_read(&sub_pool->active_entries_count); +} + +static int mlxsw_sp_counter_sub_pools_init(struct mlxsw_sp *mlxsw_sp) { - unsigned int total_bank_config = 0; - unsigned int pool_size; + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int base_index = 0; + enum mlxsw_res_id res_id; + int err; int i; - pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); - /* Check config is valid, no bank over subscription */ - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) - total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count; - if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1) - return -EINVAL; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + res_id = sub_pool->entry_size_res_id; + + if (!mlxsw_core_res_valid(mlxsw_sp->core, res_id)) + return -EIO; + sub_pool->entry_size = mlxsw_core_res_get(mlxsw_sp->core, + res_id); + err = devlink_resource_size_get(devlink, + sub_pool->resource_id, + &sub_pool->size); + if (err) + goto err_resource_size_get; + + devlink_resource_occ_get_register(devlink, + sub_pool->resource_id, + mlxsw_sp_counter_sub_pool_occ_get, + sub_pool); + + sub_pool->base_index = base_index; + base_index += sub_pool->size; + atomic_set(&sub_pool->active_entries_count, 0); + } return 0; + +err_resource_size_get: + for (i--; i >= 0; i--) { + sub_pool = &pool->sub_pools[i]; + + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } + return err; } -static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_counter_sub_pools_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; - /* Prepare generic flow pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_PACKETS_BYTES); - /* Prepare erif pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_ROUTER_BASIC); - return 0; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + + WARN_ON(atomic_read(&sub_pool->active_entries_count)); + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } +} + +static u64 mlxsw_sp_counter_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_pool *pool = priv; + + return atomic_read(&pool->active_entries_count); } int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) { + unsigned int sub_pools_count = ARRAY_SIZE(mlxsw_sp_counter_sub_pools); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; struct mlxsw_sp_counter_pool *pool; - unsigned int base_index; unsigned int map_size; - int i; int err; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE)) - return -EIO; - - err = mlxsw_sp_counter_pool_validate(mlxsw_sp); - if (err) - return err; - - err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp); - if (err) - return err; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc(struct_size(pool, sub_pools, sub_pools_count), + GFP_KERNEL); if (!pool) return -ENOMEM; + mlxsw_sp->counter_pool = pool; + memcpy(pool->sub_pools, mlxsw_sp_counter_sub_pools, + sub_pools_count * sizeof(*sub_pool)); + pool->sub_pools_count = sub_pools_count; + spin_lock_init(&pool->counter_pool_lock); + atomic_set(&pool->active_entries_count, 0); + + err = devlink_resource_size_get(devlink, MLXSW_SP_RESOURCE_COUNTERS, + &pool->pool_size); + if (err) + goto err_pool_resource_size_get; + devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_COUNTERS, + mlxsw_sp_counter_pool_occ_get, pool); - pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long); pool->usage = kzalloc(map_size, GFP_KERNEL); @@ -97,26 +152,18 @@ int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) goto err_usage_alloc; } - pool->sub_pools = mlxsw_sp_counter_sub_pools; - /* Allocation is based on bank count which should be - * specified for each sub pool statically. - */ - base_index = 0; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { - sub_pool = &pool->sub_pools[i]; - sub_pool->size = sub_pool->bank_count * - MLXSW_SP_COUNTER_POOL_BANK_SIZE; - sub_pool->base_index = base_index; - base_index += sub_pool->size; - /* The last bank can't be fully used */ - if (sub_pool->base_index + sub_pool->size > pool->pool_size) - sub_pool->size = pool->pool_size - sub_pool->base_index; - } + err = mlxsw_sp_counter_sub_pools_init(mlxsw_sp); + if (err) + goto err_sub_pools_init; - mlxsw_sp->counter_pool = pool; return 0; +err_sub_pools_init: + kfree(pool->usage); err_usage_alloc: + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); +err_pool_resource_size_get: kfree(pool); return err; } @@ -124,10 +171,15 @@ err_usage_alloc: void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_counter_sub_pools_fini(mlxsw_sp); WARN_ON(find_first_bit(pool->usage, pool->pool_size) != pool->pool_size); + WARN_ON(atomic_read(&pool->active_entries_count)); kfree(pool->usage); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); kfree(pool); } @@ -139,25 +191,37 @@ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_counter_sub_pool *sub_pool; unsigned int entry_index; unsigned int stop_index; - int i; + int i, err; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; stop_index = sub_pool->base_index + sub_pool->size; entry_index = sub_pool->base_index; + spin_lock(&pool->counter_pool_lock); entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index); - if (entry_index == stop_index) - return -ENOBUFS; + if (entry_index == stop_index) { + err = -ENOBUFS; + goto err_alloc; + } /* The sub-pools can contain non-integer number of entries * so we must check for overflow */ - if (entry_index + sub_pool->entry_size > stop_index) - return -ENOBUFS; + if (entry_index + sub_pool->entry_size > stop_index) { + err = -ENOBUFS; + goto err_alloc; + } for (i = 0; i < sub_pool->entry_size; i++) __set_bit(entry_index + i, pool->usage); + spin_unlock(&pool->counter_pool_lock); *p_counter_index = entry_index; + atomic_add(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_add(sub_pool->entry_size, &pool->active_entries_count); return 0; + +err_alloc: + spin_unlock(&pool->counter_pool_lock); + return err; } void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, @@ -170,7 +234,77 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(counter_index >= pool->pool_size)) return; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; + spin_lock(&pool->counter_pool_lock); for (i = 0; i < sub_pool->entry_size; i++) __clear_bit(counter_index + i, pool->usage); + spin_unlock(&pool->counter_pool_lock); + atomic_sub(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_sub(sub_pool->entry_size, &pool->active_entries_count); +} + +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core) +{ + static struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + const struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int total_bank_config; + u64 sub_pool_size; + u64 base_index; + u64 pool_size; + u64 bank_size; + int err; + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_POOL_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_BANK_SIZE)) + return -EIO; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_POOL_SIZE); + bank_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_BANK_SIZE); + + devlink_resource_size_params_init(&size_params, pool_size, + pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + MLXSW_SP_RESOURCE_NAME_COUNTERS, + pool_size, + MLXSW_SP_RESOURCE_COUNTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + return err; + + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + total_bank_config = 0; + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &mlxsw_sp_counter_sub_pools[i]; + sub_pool_size = sub_pool->bank_count * bank_size; + /* The last bank can't be fully used */ + if (base_index + sub_pool_size > pool_size) + sub_pool_size = pool_size - base_index; + base_index += sub_pool_size; + + devlink_resource_size_params_init(&size_params, sub_pool_size, + sub_pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + sub_pool->resource_name, + sub_pool_size, + sub_pool->resource_id, + MLXSW_SP_RESOURCE_COUNTERS, + &size_params); + if (err) + return err; + total_bank_config += sub_pool->bank_count; + } + + /* Check config is valid, no bank over subscription */ + if (WARN_ON(total_bank_config > div64_u64(pool_size, bank_size) + 1)) + return -EINVAL; + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h index 81465e267b10..a68d931090dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -4,6 +4,7 @@ #ifndef _MLXSW_SPECTRUM_CNT_H #define _MLXSW_SPECTRUM_CNT_H +#include "core.h" #include "spectrum.h" enum mlxsw_sp_counter_sub_pool_id { @@ -19,5 +20,6 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 2dc0978428e6..daf029931b5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -2,6 +2,7 @@ /* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ #include <linux/kernel.h> +#include <linux/mutex.h> #include <net/devlink.h> #include "spectrum.h" @@ -210,7 +211,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, return err; rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); i = 0; start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); @@ -241,14 +242,14 @@ start_again: devlink_dpipe_entry_ctx_close(dump_ctx); if (i != rif_count) goto start_again; - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); devlink_dpipe_entry_clear(&entry); return 0; err_entry_append: err_entry_get: err_ctx_prepare: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); devlink_dpipe_entry_clear(&entry); return err; } @@ -258,7 +259,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) struct mlxsw_sp *mlxsw_sp = priv; int i; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -271,7 +272,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return 0; } @@ -546,7 +547,7 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp, int i, j; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); i = 0; rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); start_again: @@ -602,12 +603,12 @@ out: if (i != rif_count) goto start_again; - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return 0; err_ctx_prepare: err_entry_append: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -662,7 +663,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, { int i; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); struct mlxsw_sp_neigh_entry *neigh_entry; @@ -684,7 +685,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, enable); } } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); } static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable) @@ -701,7 +702,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) u64 size = 0; int i; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); struct mlxsw_sp_neigh_entry *neigh_entry; @@ -721,7 +722,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) size++; } } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return size; } @@ -1093,7 +1094,7 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, int j; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); @@ -1130,13 +1131,13 @@ skip: devlink_dpipe_entry_ctx_close(dump_ctx); if (nh_count != nh_count_max) goto start_again; - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return 0; err_ctx_prepare: err_entry_append: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -1206,9 +1207,9 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv) struct mlxsw_sp *mlxsw_sp = priv; u64 size; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return size; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 8df3cb21baa6..004c42274e48 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -8,6 +8,7 @@ #include <linux/netdevice.h> #include <linux/rhashtable.h> #include <linux/rtnetlink.h> +#include <linux/refcount.h> #include "spectrum.h" #include "reg.h" @@ -24,7 +25,7 @@ struct mlxsw_sp_fid_core { struct mlxsw_sp_fid { struct list_head list; struct mlxsw_sp_rif *rif; - unsigned int ref_count; + refcount_t ref_count; u16 fid_index; struct mlxsw_sp_fid_family *fid_family; struct rhash_head ht_node; @@ -149,7 +150,7 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->fid_ht, &fid_index, mlxsw_sp_fid_ht_params); if (fid) - fid->ref_count++; + refcount_inc(&fid->ref_count); return fid; } @@ -183,7 +184,7 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni, mlxsw_sp_fid_vni_ht_params); if (fid) - fid->ref_count++; + refcount_inc(&fid->ref_count); return fid; } @@ -437,16 +438,6 @@ static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } -static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, - u16 vid, bool valid) -{ - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - char svfa_pl[MLXSW_REG_SVFA_LEN]; - - mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid_index, vid); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); -} - static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, u8 local_port, u16 vid, bool valid) { @@ -457,140 +448,6 @@ static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); } -static int mlxsw_sp_fid_8021q_configure(struct mlxsw_sp_fid *fid) -{ - struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; - struct mlxsw_sp_fid_8021q *fid_8021q; - int err; - - err = mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, fid->fid_index, true); - if (err) - return err; - - fid_8021q = mlxsw_sp_fid_8021q_fid(fid); - err = mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, - true); - if (err) - goto err_fid_map; - - return 0; - -err_fid_map: - mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false); - return err; -} - -static void mlxsw_sp_fid_8021q_deconfigure(struct mlxsw_sp_fid *fid) -{ - struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; - struct mlxsw_sp_fid_8021q *fid_8021q; - - fid_8021q = mlxsw_sp_fid_8021q_fid(fid); - mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, false); - mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false); -} - -static int mlxsw_sp_fid_8021q_index_alloc(struct mlxsw_sp_fid *fid, - const void *arg, u16 *p_fid_index) -{ - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - u16 vid = *(u16 *) arg; - - /* Use 1:1 mapping for simplicity although not a must */ - if (vid < fid_family->start_index || vid > fid_family->end_index) - return -EINVAL; - *p_fid_index = vid; - - return 0; -} - -static bool -mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg) -{ - u16 vid = *(u16 *) arg; - - return mlxsw_sp_fid_8021q_fid(fid)->vid == vid; -} - -static u16 mlxsw_sp_fid_8021q_flood_index(const struct mlxsw_sp_fid *fid) -{ - return fid->fid_index; -} - -static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid, - struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; - - /* In case there are no {Port, VID} => FID mappings on the port, - * we can use the global VID => FID mapping we created when the - * FID was configured. - */ - if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0) - return 0; - return __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, - vid, true); -} - -static void -mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid, - struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; - - if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0) - return; - __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, vid, - false); -} - -static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_ops = { - .setup = mlxsw_sp_fid_8021q_setup, - .configure = mlxsw_sp_fid_8021q_configure, - .deconfigure = mlxsw_sp_fid_8021q_deconfigure, - .index_alloc = mlxsw_sp_fid_8021q_index_alloc, - .compare = mlxsw_sp_fid_8021q_compare, - .flood_index = mlxsw_sp_fid_8021q_flood_index, - .port_vid_map = mlxsw_sp_fid_8021q_port_vid_map, - .port_vid_unmap = mlxsw_sp_fid_8021q_port_vid_unmap, -}; - -static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021q_flood_tables[] = { - { - .packet_type = MLXSW_SP_FLOOD_TYPE_UC, - .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, - .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, - .table_index = 0, - }, - { - .packet_type = MLXSW_SP_FLOOD_TYPE_MC, - .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, - .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, - .table_index = 1, - }, - { - .packet_type = MLXSW_SP_FLOOD_TYPE_BC, - .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, - .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET, - .table_index = 2, - }, -}; - -/* Range and flood configuration must match mlxsw_config_profile */ -static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_family = { - .type = MLXSW_SP_FID_TYPE_8021Q, - .fid_size = sizeof(struct mlxsw_sp_fid_8021q), - .start_index = 1, - .end_index = VLAN_VID_MASK, - .flood_tables = mlxsw_sp_fid_8021q_flood_tables, - .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021q_flood_tables), - .rif_type = MLXSW_SP_RIF_TYPE_VLAN, - .ops = &mlxsw_sp_fid_8021q_ops, -}; - static struct mlxsw_sp_fid_8021d * mlxsw_sp_fid_8021d_fid(const struct mlxsw_sp_fid *fid) { @@ -845,6 +702,14 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = { .lag_vid_valid = 1, }; +static bool +mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg) +{ + u16 vid = *(u16 *) arg; + + return mlxsw_sp_fid_8021q_fid(fid)->vid == vid; +} + static void mlxsw_sp_fid_8021q_fdb_clear_offload(const struct mlxsw_sp_fid *fid, const struct net_device *nve_dev) @@ -1030,7 +895,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp, list_for_each_entry(fid, &fid_family->fids_list, list) { if (!fid->fid_family->ops->compare(fid, arg)) continue; - fid->ref_count++; + refcount_inc(&fid->ref_count); return fid; } @@ -1075,7 +940,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, goto err_rhashtable_insert; list_add(&fid->list, &fid_family->fids_list); - fid->ref_count++; + refcount_set(&fid->ref_count, 1); return fid; err_rhashtable_insert: @@ -1093,7 +958,7 @@ void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid) struct mlxsw_sp_fid_family *fid_family = fid->fid_family; struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; - if (--fid->ref_count != 0) + if (!refcount_dec_and_test(&fid->ref_count)) return; list_del(&fid->list); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index b607919c8ad0..2f76908cae73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -26,11 +26,20 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (!flow_action_has_entries(flow_action)) return 0; + if (!flow_action_mixed_hw_stats_check(flow_action, extack)) + return -EOPNOTSUPP; - /* Count action is inserted first */ - err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); - if (err) - return err; + act = flow_action_first_entry_get(flow_action); + if (act->hw_stats == FLOW_ACTION_HW_STATS_ANY || + act->hw_stats == FLOW_ACTION_HW_STATS_IMMEDIATE) { + /* Count action is inserted first */ + err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); + if (err) + return err; + } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); + return -EOPNOTSUPP; + } flow_action_for_each(i, act, flow_action) { switch (act->id) { @@ -41,12 +50,30 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return err; } break; - case FLOW_ACTION_DROP: - err = mlxsw_sp_acl_rulei_act_drop(rulei); + case FLOW_ACTION_DROP: { + bool ingress; + + if (mlxsw_sp_acl_block_is_mixed_bound(block)) { + NL_SET_ERR_MSG_MOD(extack, "Drop action is not supported when block is bound to ingress and egress"); + return -EOPNOTSUPP; + } + ingress = mlxsw_sp_acl_block_is_ingress_bound(block); + err = mlxsw_sp_acl_rulei_act_drop(rulei, ingress, + act->cookie, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action"); return err; } + + /* Forbid block with this rulei to be bound + * to ingress/egress in future. Ingress rule is + * a blocker for egress and vice versa. + */ + if (ingress) + rulei->egress_bind_blocker = 1; + else + rulei->ingress_bind_blocker = 1; + } break; case FLOW_ACTION_TRAP: err = mlxsw_sp_acl_rulei_act_trap(rulei); @@ -127,6 +154,25 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, act->id, vid, proto, prio, extack); } + case FLOW_ACTION_PRIORITY: + return mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, + act->priority, + extack); + case FLOW_ACTION_MANGLE: { + enum flow_action_mangle_base htype = act->mangle.htype; + __be32 be_mask = (__force __be32) act->mangle.mask; + __be32 be_val = (__force __be32) act->mangle.val; + u32 offset = act->mangle.offset; + u32 mask = be32_to_cpu(be_mask); + u32 val = be32_to_cpu(be_val); + + err = mlxsw_sp_acl_rulei_act_mangle(mlxsw_sp, rulei, + htype, offset, + mask, val, extack); + if (err) + return err; + break; + } default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); @@ -525,6 +571,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct flow_cls_offload *f) { + enum flow_action_hw_stats used_hw_stats = FLOW_ACTION_HW_STATS_DISABLED; struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule *rule; u64 packets; @@ -543,11 +590,11 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, return -EINVAL; err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes, - &lastuse); + &lastuse, &used_hw_stats); if (err) goto err_rule_get_stats; - flow_stats_update(&f->stats, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse, used_hw_stats); mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index 1e4cdee7bcd7..20d72f1c0cee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -2,13 +2,15 @@ /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ #include <linux/kernel.h> +#include <linux/mutex.h> #include <linux/slab.h> #include "spectrum.h" struct mlxsw_sp_kvdl { const struct mlxsw_sp_kvdl_ops *kvdl_ops; - unsigned long priv[0]; + struct mutex kvdl_lock; /* Protects kvdl allocations */ + unsigned long priv[]; /* priv has to be always the last item */ }; @@ -22,6 +24,7 @@ int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) GFP_KERNEL); if (!kvdl) return -ENOMEM; + mutex_init(&kvdl->kvdl_lock); kvdl->kvdl_ops = kvdl_ops; mlxsw_sp->kvdl = kvdl; @@ -31,6 +34,7 @@ int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) return 0; err_init: + mutex_destroy(&kvdl->kvdl_lock); kfree(kvdl); return err; } @@ -40,6 +44,7 @@ void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv); + mutex_destroy(&kvdl->kvdl_lock); kfree(kvdl); } @@ -48,9 +53,14 @@ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index) { struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + int err; + + mutex_lock(&kvdl->kvdl_lock); + err = kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type, + entry_count, p_entry_index); + mutex_unlock(&kvdl->kvdl_lock); - return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type, - entry_count, p_entry_index); + return err; } void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, @@ -59,8 +69,10 @@ void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; + mutex_lock(&kvdl->kvdl_lock); kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type, entry_count, entry_index); + mutex_unlock(&kvdl->kvdl_lock); } int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 336e5ecc68f8..47eb751a2570 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 /* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */ +#include <linux/mutex.h> #include <linux/rhashtable.h> #include <net/ipv6.h> @@ -12,6 +13,7 @@ struct mlxsw_sp_mr { void *catchall_route_priv; struct delayed_work stats_update_dw; struct list_head table_list; + struct mutex table_list_lock; /* Protects table_list */ #define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ unsigned long priv[0]; /* priv has to be always the last item */ @@ -66,9 +68,10 @@ struct mlxsw_sp_mr_table { u32 vr_id; struct mlxsw_sp_mr_vif vifs[MAXVIFS]; struct list_head route_list; + struct mutex route_list_lock; /* Protects route_list */ struct rhashtable route_ht; const struct mlxsw_sp_mr_table_ops *ops; - char catchall_route_priv[0]; + char catchall_route_priv[]; /* catchall_route_priv has to be always the last item */ }; @@ -370,11 +373,13 @@ static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, struct mlxsw_sp_mr_route *mr_route) { + WARN_ON_ONCE(!mutex_is_locked(&mr_table->route_list_lock)); + mlxsw_sp_mr_mfc_offload_set(mr_route, false); - mlxsw_sp_mr_route_erase(mr_table, mr_route); rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, mlxsw_sp_mr_route_ht_params); list_del(&mr_route->node); + mlxsw_sp_mr_route_erase(mr_table, mr_route); mlxsw_sp_mr_route_destroy(mr_table, mr_route); } @@ -415,19 +420,21 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, goto err_duplicate_route; } + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + /* Put it in the table data-structures */ + mutex_lock(&mr_table->route_list_lock); list_add_tail(&mr_route->node, &mr_table->route_list); + mutex_unlock(&mr_table->route_list_lock); err = rhashtable_insert_fast(&mr_table->route_ht, &mr_route->ht_node, mlxsw_sp_mr_route_ht_params); if (err) goto err_rhashtable_insert; - /* Write the route to the hardware */ - err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); - if (err) - goto err_mr_route_write; - /* Destroy the original route */ if (replace) { rhashtable_remove_fast(&mr_table->route_ht, @@ -440,11 +447,12 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, mlxsw_sp_mr_mfc_offload_update(mr_route); return 0; -err_mr_route_write: - rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, - mlxsw_sp_mr_route_ht_params); err_rhashtable_insert: + mutex_lock(&mr_table->route_list_lock); list_del(&mr_route->node); + mutex_unlock(&mr_table->route_list_lock); + mlxsw_sp_mr_route_erase(mr_table, mr_route); +err_mr_route_write: err_no_orig_route: err_duplicate_route: mlxsw_sp_mr_route_destroy(mr_table, mr_route); @@ -460,8 +468,11 @@ void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, mr_table->ops->key_create(mr_table, &key, mfc); mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, mlxsw_sp_mr_route_ht_params); - if (mr_route) + if (mr_route) { + mutex_lock(&mr_table->route_list_lock); __mlxsw_sp_mr_route_del(mr_table, mr_route); + mutex_unlock(&mr_table->route_list_lock); + } } /* Should be called after the VIF struct is updated */ @@ -910,6 +921,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, mr_table->proto = proto; mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto]; INIT_LIST_HEAD(&mr_table->route_list); + mutex_init(&mr_table->route_list_lock); err = rhashtable_init(&mr_table->route_ht, &mlxsw_sp_mr_route_ht_params); @@ -927,12 +939,15 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, &catchall_route_params); if (err) goto err_ops_route_create; + mutex_lock(&mr->table_list_lock); list_add_tail(&mr_table->node, &mr->table_list); + mutex_unlock(&mr->table_list_lock); return mr_table; err_ops_route_create: rhashtable_destroy(&mr_table->route_ht); err_route_rhashtable_init: + mutex_destroy(&mr_table->route_list_lock); kfree(mr_table); return ERR_PTR(err); } @@ -943,10 +958,13 @@ void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) struct mlxsw_sp_mr *mr = mlxsw_sp->mr; WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + mutex_lock(&mr->table_list_lock); list_del(&mr_table->node); + mutex_unlock(&mr->table_list_lock); mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, &mr_table->catchall_route_priv); rhashtable_destroy(&mr_table->route_ht); + mutex_destroy(&mr_table->route_list_lock); kfree(mr_table); } @@ -955,8 +973,10 @@ void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) struct mlxsw_sp_mr_route *mr_route, *tmp; int i; + mutex_lock(&mr_table->route_list_lock); list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) __mlxsw_sp_mr_route_del(mr_table, mr_route); + mutex_unlock(&mr_table->route_list_lock); for (i = 0; i < MAXVIFS; i++) { mr_table->vifs[i].dev = NULL; @@ -1000,12 +1020,15 @@ static void mlxsw_sp_mr_stats_update(struct work_struct *work) struct mlxsw_sp_mr_route *mr_route; unsigned long interval; - rtnl_lock(); - list_for_each_entry(mr_table, &mr->table_list, node) + mutex_lock(&mr->table_list_lock); + list_for_each_entry(mr_table, &mr->table_list, node) { + mutex_lock(&mr_table->route_list_lock); list_for_each_entry(mr_route, &mr_table->route_list, node) mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, mr_route); - rtnl_unlock(); + mutex_unlock(&mr_table->route_list_lock); + } + mutex_unlock(&mr->table_list_lock); interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); @@ -1024,6 +1047,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, mr->mr_ops = mr_ops; mlxsw_sp->mr = mr; INIT_LIST_HEAD(&mr->table_list); + mutex_init(&mr->table_list_lock); err = mr_ops->init(mlxsw_sp, mr->priv); if (err) @@ -1035,6 +1059,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); return 0; err: + mutex_destroy(&mr->table_list_lock); kfree(mr); return err; } @@ -1045,5 +1070,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mr->stats_update_dw); mr->mr_ops->fini(mlxsw_sp, mr->priv); + mutex_destroy(&mr->table_list_lock); kfree(mr); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index 2153bcc4b585..54d3e7dcd303 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -67,7 +67,7 @@ struct mlxsw_sp_nve_mc_record { struct mlxsw_sp_nve_mc_list *mc_list; const struct mlxsw_sp_nve_mc_record_ops *ops; u32 kvdl_index; - struct mlxsw_sp_nve_mc_entry entries[0]; + struct mlxsw_sp_nve_mc_entry entries[]; }; struct mlxsw_sp_nve_mc_list { @@ -713,27 +713,6 @@ static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); } -u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp) -{ - WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0); - - return mlxsw_sp->nve->tunnel_index; -} - -bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, - u32 tb_id, __be32 addr) -{ - struct mlxsw_sp_nve *nve = mlxsw_sp->nve; - struct mlxsw_sp_nve_config *config = &nve->config; - - if (nve->num_nve_tunnels && - config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 && - config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id) - return true; - - return false; -} - static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nve_config *config) { @@ -744,6 +723,8 @@ static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, if (nve->num_nve_tunnels++ != 0) return 0; + nve->config = *config; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, &nve->tunnel_index); if (err) @@ -760,6 +741,7 @@ err_ops_init: mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, nve->tunnel_index); err_kvdl_alloc: + memset(&nve->config, 0, sizeof(nve->config)); nve->num_nve_tunnels--; return err; } @@ -840,8 +822,6 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, goto err_fid_vni_set; } - nve->config = config; - err = ops->fdb_replay(params->dev, params->vni, extack); if (err) goto err_fdb_replay; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 34f7c3501b08..9650562fc0ef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -922,6 +922,8 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, case HWTSTAMP_TX_ONESTEP_SYNC: case HWTSTAMP_TX_ONESTEP_P2P: return -ERANGE; + default: + return -EINVAL; } switch (rx_filter) { @@ -952,6 +954,8 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, case HWTSTAMP_FILTER_SOME: case HWTSTAMP_FILTER_NTP_ALL: return -ERANGE; + default: + return -EINVAL; } *p_ing_types = ing_types; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 02526c53d4f5..670a43fe2a00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -20,14 +20,17 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_PRIO, MLXSW_SP_QDISC_ETS, MLXSW_SP_QDISC_TBF, + MLXSW_SP_QDISC_FIFO, }; +struct mlxsw_sp_qdisc; + struct mlxsw_sp_qdisc_ops { enum mlxsw_sp_qdisc_type type; int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); - int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, + int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); int (*destroy)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); @@ -64,6 +67,25 @@ struct mlxsw_sp_qdisc { struct mlxsw_sp_qdisc_ops *ops; }; +struct mlxsw_sp_qdisc_state { + struct mlxsw_sp_qdisc root_qdisc; + struct mlxsw_sp_qdisc tclass_qdiscs[IEEE_8021QAZ_MAX_TCS]; + + /* When a PRIO or ETS are added, the invisible FIFOs in their bands are + * created first. When notifications for these FIFOs arrive, it is not + * known what qdisc their parent handle refers to. It could be a + * newly-created PRIO that will replace the currently-offloaded one, or + * it could be e.g. a RED that will be attached below it. + * + * As the notifications start to arrive, use them to note what the + * future parent handle is, and keep track of which child FIFOs were + * seen. Then when the parent is known, retroactively offload those + * FIFOs. + */ + u32 future_handle; + bool future_fifos[IEEE_8021QAZ_MAX_TCS]; +}; + static bool mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, enum mlxsw_sp_qdisc_type type) @@ -77,36 +99,38 @@ static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, bool root_only) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int tclass, child_index; if (parent == TC_H_ROOT) - return mlxsw_sp_port->root_qdisc; + return &qdisc_state->root_qdisc; - if (root_only || !mlxsw_sp_port->root_qdisc || - !mlxsw_sp_port->root_qdisc->ops || - TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle || + if (root_only || !qdisc_state || + !qdisc_state->root_qdisc.ops || + TC_H_MAJ(parent) != qdisc_state->root_qdisc.handle || TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) return NULL; child_index = TC_H_MIN(parent); tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); - return &mlxsw_sp_port->tclass_qdiscs[tclass]; + return &qdisc_state->tclass_qdiscs[tclass]; } static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int i; - if (mlxsw_sp_port->root_qdisc->handle == handle) - return mlxsw_sp_port->root_qdisc; + if (qdisc_state->root_qdisc.handle == handle) + return &qdisc_state->root_qdisc; - if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC) + if (qdisc_state->root_qdisc.handle == TC_H_UNSPEC) return NULL; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle) - return &mlxsw_sp_port->tclass_qdiscs[i]; + if (qdisc_state->tclass_qdiscs[i].handle == handle) + return &qdisc_state->tclass_qdiscs[i]; return NULL; } @@ -147,11 +171,15 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, if (err) goto err_bad_param; - err = ops->replace(mlxsw_sp_port, mlxsw_sp_qdisc, params); + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); if (err) goto err_config; - if (mlxsw_sp_qdisc->handle != handle) { + /* Check if the Qdisc changed. That includes a situation where an + * invisible Qdisc replaces another one, or is being added for the + * first time. + */ + if (mlxsw_sp_qdisc->handle != handle || handle == TC_H_UNSPEC) { mlxsw_sp_qdisc->ops = ops; if (ops->clean_stats) ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); @@ -295,7 +323,7 @@ mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, - u32 probability, bool is_ecn) + u32 probability, bool is_wred, bool is_ecn) { char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; char cwtp_cmd[MLXSW_REG_CWTP_LEN]; @@ -313,7 +341,7 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, return err; mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, - MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); + MLXSW_REG_CWTP_DEFAULT_PROFILE, is_wred, is_ecn); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); } @@ -347,7 +375,6 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->prio_bitmap, &stats_base->tx_packets, &stats_base->tx_bytes); - red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num); @@ -361,7 +388,8 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; if (root_qdisc != mlxsw_sp_qdisc) root_qdisc->stats_base.backlog -= @@ -400,7 +428,7 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { @@ -417,8 +445,9 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, prob = DIV_ROUND_UP(prob, 1 << 16); min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); - return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, - max, prob, p->is_ecn); + return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, + min, max, prob, + !p->is_nodrop, p->is_ecn); } static void @@ -453,22 +482,19 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; struct red_stats *res = xstats_ptr; - int early_drops, marks, pdrops; + int early_drops, pdrops; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; - marks = xstats->ecn - xstats_base->prob_mark; pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - xstats_base->pdrop; res->pdrop += pdrops; res->prob_drop += early_drops; - res->prob_mark += marks; xstats_base->pdrop += pdrops; xstats_base->prob_drop += early_drops; - xstats_base->prob_mark += marks; return 0; } @@ -486,8 +512,7 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base = &mlxsw_sp_qdisc->stats_base; mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr); - overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - - stats_base->overlimits; + overlimits = xstats->wred_drop[tclass_num] - stats_base->overlimits; stats_ptr->qstats->overlimits += overlimits; stats_base->overlimits += overlimits; @@ -564,7 +589,8 @@ static int mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; if (root_qdisc != mlxsw_sp_qdisc) root_qdisc->stats_base.backlog -= @@ -651,7 +677,7 @@ mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { @@ -740,8 +766,121 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, } static int +mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + return 0; +} + +static int +mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + return 0; +} + +static int +mlxsw_sp_qdisc_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + return 0; +} + +static int +mlxsw_sp_qdisc_get_fifo_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + stats_ptr); + return 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = { + .type = MLXSW_SP_QDISC_FIFO, + .check_params = mlxsw_sp_qdisc_fifo_check_params, + .replace = mlxsw_sp_qdisc_fifo_replace, + .destroy = mlxsw_sp_qdisc_fifo_destroy, + .get_stats = mlxsw_sp_qdisc_get_fifo_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, +}; + +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) +{ + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int tclass, child_index; + u32 parent_handle; + + /* Invisible FIFOs are tracked in future_handle and future_fifos. Make + * sure that not more than one qdisc is created for a port at a time. + * RTNL is a simple proxy for that. + */ + ASSERT_RTNL(); + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) { + parent_handle = TC_H_MAJ(p->parent); + if (parent_handle != qdisc_state->future_handle) { + /* This notifications is for a different Qdisc than + * previously. Wipe the future cache. + */ + memset(qdisc_state->future_fifos, 0, + sizeof(qdisc_state->future_fifos)); + qdisc_state->future_handle = parent_handle; + } + + child_index = TC_H_MIN(p->parent); + tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); + if (tclass < IEEE_8021QAZ_MAX_TCS) { + if (p->command == TC_FIFO_REPLACE) + qdisc_state->future_fifos[tclass] = true; + else if (p->command == TC_FIFO_DESTROY) + qdisc_state->future_fifos[tclass] = false; + } + } + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_FIFO_REPLACE) { + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_fifo, NULL); + } + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_FIFO)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_FIFO_DESTROY: + if (p->handle == mlxsw_sp_qdisc->handle) + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + mlxsw_sp_qdisc); + return 0; + case TC_FIFO_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_FIFO_REPLACE: /* Handled above. */ + break; + } + + return -EOPNOTSUPP; +} + +static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { @@ -751,8 +890,8 @@ __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &mlxsw_sp_port->tclass_qdiscs[i]); - mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; + &qdisc_state->tclass_qdiscs[i]); + qdisc_state->tclass_qdiscs[i].prio_bitmap = 0; } return 0; @@ -785,12 +924,13 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, unsigned int nbands, const unsigned int *quanta, const unsigned int *weights, const u8 *priomap) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *child_qdisc; int tclass, i, band, backlog; u8 old_priomap; @@ -798,7 +938,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; @@ -827,28 +967,41 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc); child_qdisc->stats_base.backlog = backlog; } + + if (handle == qdisc_state->future_handle && + qdisc_state->future_fifos[tclass]) { + err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, + child_qdisc, + &mlxsw_sp_qdisc_ops_fifo, + NULL); + if (err) + return err; + } } for (; band < IEEE_8021QAZ_MAX_TCS; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, tclass, 0, false, 0); } + + qdisc_state->future_handle = TC_H_UNSPEC; + memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos)); return 0; } static int -mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, zeroes, zeroes, p->priomap); } @@ -880,6 +1033,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *tc_qdisc; u64 tx_packets = 0; u64 tx_bytes = 0; @@ -888,7 +1042,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tc_qdisc = &mlxsw_sp_port->tclass_qdiscs[i]; + tc_qdisc = &qdisc_state->tclass_qdiscs[i]; mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, &tx_bytes, &tx_packets, &drops, &backlog); @@ -946,13 +1100,13 @@ mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_ets_qopt_offload_replace_params *p = params; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, p->quanta, p->weights, p->priomap); } @@ -1014,11 +1168,12 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u8 band, u32 child_handle) { + struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; if (band < IEEE_8021QAZ_MAX_TCS && - mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle) + qdisc_state->tclass_qdiscs[tclass_num].handle == child_handle) return 0; if (!child_handle) { @@ -1037,7 +1192,7 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &mlxsw_sp_port->tclass_qdiscs[tclass_num]); + &qdisc_state->tclass_qdiscs[tclass_num]); return -EOPNOTSUPP; } @@ -1119,37 +1274,23 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + struct mlxsw_sp_qdisc_state *qdisc_state; int i; - mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL); - if (!mlxsw_sp_qdisc) - goto err_root_qdisc_init; - - mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc; - mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff; - mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL); + if (!qdisc_state) + return -ENOMEM; - mlxsw_sp_qdisc = kcalloc(IEEE_8021QAZ_MAX_TCS, - sizeof(*mlxsw_sp_qdisc), - GFP_KERNEL); - if (!mlxsw_sp_qdisc) - goto err_tclass_qdiscs_init; - - mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc; + qdisc_state->root_qdisc.prio_bitmap = 0xff; + qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i; + qdisc_state->tclass_qdiscs[i].tclass_num = i; + mlxsw_sp_port->qdisc = qdisc_state; return 0; - -err_tclass_qdiscs_init: - kfree(mlxsw_sp_port->root_qdisc); -err_root_qdisc_init: - return -ENOMEM; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { - kfree(mlxsw_sp_port->tclass_qdiscs); - kfree(mlxsw_sp_port->root_qdisc); + kfree(mlxsw_sp_port->qdisc); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4a77b511ead2..d5bca1be3ef5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -17,6 +17,7 @@ #include <linux/refcount.h> #include <linux/jhash.h> #include <linux/net_namespace.h> +#include <linux/mutex.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -48,39 +49,6 @@ struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; -struct mlxsw_sp_router { - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif **rifs; - struct mlxsw_sp_vr *vrs; - struct rhashtable neigh_ht; - struct rhashtable nexthop_group_ht; - struct rhashtable nexthop_ht; - struct list_head nexthop_list; - struct { - /* One tree for each protocol: IPv4 and IPv6 */ - struct mlxsw_sp_lpm_tree *proto_trees[2]; - struct mlxsw_sp_lpm_tree *trees; - unsigned int tree_count; - } lpm; - struct { - struct delayed_work dw; - unsigned long interval; /* ms */ - } neighs_update; - struct delayed_work nexthop_probe_dw; -#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ - struct list_head nexthop_neighs_list; - struct list_head ipip_list; - bool aborted; - struct notifier_block fib_nb; - struct notifier_block netevent_nb; - struct notifier_block inetaddr_nb; - struct notifier_block inet6addr_nb; - const struct mlxsw_sp_rif_ops **rif_ops_arr; - const struct mlxsw_sp_ipip_ops **ipip_ops_arr; - u32 adj_discard_index; - bool adj_discard_index_valid; -}; - struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; @@ -145,6 +113,9 @@ struct mlxsw_sp_rif_ops { void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); }; +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree); static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, @@ -760,13 +731,18 @@ int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, u16 *vr_id) { struct mlxsw_sp_vr *vr; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); - if (!vr) - return -ESRCH; + if (!vr) { + err = -ESRCH; + goto out; + } *vr_id = vr->id; - - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, @@ -988,17 +964,23 @@ __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) struct ip_tunnel *tun = netdev_priv(ol_dev); struct net *net = dev_net(ol_dev); - return __dev_get_by_index(net, tun->parms.link); + return dev_get_by_index_rcu(net, tun->parms.link); } u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) { - struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + struct net_device *d; + u32 tb_id; + rcu_read_lock(); + d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); if (d) - return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN; else - return RT_TABLE_MAIN; + tb_id = RT_TABLE_MAIN; + rcu_read_unlock(); + + return tb_id; } static struct mlxsw_sp_rif * @@ -1230,7 +1212,7 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, saddr_len = 4; saddr_prefix_len = 32; break; - case MLXSW_SP_L3_PROTO_IPV6: + default: WARN_ON(1); return NULL; } @@ -1355,8 +1337,12 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, ipip_list_node); list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - struct net_device *ipip_ul_dev = - __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + + rcu_read_lock(); + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); if (ipip_ul_dev == ul_dev) return ipip_entry; @@ -1365,10 +1351,16 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, +bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { - return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); + bool is_ipip_ul; + + mutex_lock(&mlxsw_sp->router->lock); + is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); + mutex_unlock(&mlxsw_sp->router->lock); + + return is_ipip_ul; } static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, @@ -1388,9 +1380,9 @@ static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { + enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_l3proto ul_proto; - enum mlxsw_sp_ipip_type ipipt; union mlxsw_sp_l3addr saddr; u32 ul_tb_id; @@ -1543,13 +1535,17 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif); /** - * Update the offload related to an IPIP entry. This always updates decap, and - * in addition to that it also: - * @recreate_loopback: recreates the associated loopback RIF - * @keep_encap: updates next hops that use the tunnel netdevice. This is only + * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. + * @mlxsw_sp: mlxsw_sp. + * @ipip_entry: IPIP entry. + * @recreate_loopback: Recreates the associated loopback RIF. + * @keep_encap: Updates next hops that use the tunnel netdevice. This is only * relevant when recreate_loopback is true. - * @update_nexthops: updates next hops, keeping the current loopback RIF. This + * @update_nexthops: Updates next hops, keeping the current loopback RIF. This * is only relevant when recreate_loopback is false. + * @extack: extack. + * + * Return: Non-zero value on failure. */ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1722,9 +1718,12 @@ static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, ipip_list_node) { - struct net_device *ipip_ul_dev = - __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + rcu_read_lock(); + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); if (ipip_ul_dev == ul_dev) mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); } @@ -1737,35 +1736,41 @@ int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, { struct netdev_notifier_changeupper_info *chup; struct netlink_ext_ack *extack; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); switch (event) { case NETDEV_REGISTER: - return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + break; case NETDEV_UNREGISTER: mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_UP: mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_DOWN: mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_CHANGEUPPER: chup = container_of(info, typeof(*chup), info); extack = info->extack; if (netif_is_l3_master(chup->upper_dev)) - return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, - ol_dev, - extack); - return 0; + err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, + ol_dev, + extack); + break; case NETDEV_CHANGE: extack = info->extack; - return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, - ol_dev, extack); + err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, + ol_dev, extack); + break; case NETDEV_CHANGEMTU: - return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + break; } - return 0; + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static int @@ -1809,8 +1814,9 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct netdev_notifier_info *info) { struct mlxsw_sp_ipip_entry *ipip_entry = NULL; - int err; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, ul_dev, ipip_entry))) { @@ -1823,7 +1829,7 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, if (err) { mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, ul_dev); - return err; + break; } if (demote_this) { @@ -1840,8 +1846,9 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, ipip_entry = prev; } } + mutex_unlock(&mlxsw_sp->router->lock); - return 0; + return err; } int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, @@ -1850,8 +1857,22 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, u32 tunnel_index) { enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_fib_entry *fib_entry; - int err; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(router->nve_decap_config.valid)) { + err = -EINVAL; + goto out; + } + + router->nve_decap_config.ul_tb_id = ul_tb_id; + router->nve_decap_config.tunnel_index = tunnel_index; + router->nve_decap_config.ul_proto = ul_proto; + router->nve_decap_config.ul_sip = *ul_sip; + router->nve_decap_config.valid = true; /* It is valid to create a tunnel with a local IP and only later * assign this IP address to a local interface @@ -1860,7 +1881,7 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, ul_proto, ul_sip, type); if (!fib_entry) - return 0; + goto out; fib_entry->decap.tunnel_index = tunnel_index; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; @@ -1869,11 +1890,13 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, if (err) goto err_fib_entry_update; - return 0; + goto out; err_fib_entry_update: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -1882,16 +1905,40 @@ void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, const union mlxsw_sp_l3addr *ul_sip) { enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_fib_entry *fib_entry; + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(!router->nve_decap_config.valid)) + goto out; + + router->nve_decap_config.valid = false; + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, ul_proto, ul_sip, type); if (!fib_entry) - return; + goto out; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp, + u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + + return router->nve_decap_config.valid && + router->nve_decap_config.ul_tb_id == ul_tb_id && + router->nve_decap_config.ul_proto == ul_proto && + !memcmp(&router->nve_decap_config.ul_sip, ul_sip, + sizeof(*ul_sip)); } struct mlxsw_sp_neigh_key { @@ -2264,10 +2311,8 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, int i, num_rec; int err; - /* Make sure the neighbour's netdev isn't removed in the - * process. - */ - rtnl_lock(); + /* Ensure the RIF we read from the device does not change mid-dump. */ + mutex_lock(&mlxsw_sp->router->lock); do { mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), @@ -2281,7 +2326,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, i); } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -2312,15 +2357,14 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_neigh_entry *neigh_entry; - /* Take RTNL mutex here to prevent lists from changes */ - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list, nexthop_neighs_list_node) /* If this neigh have nexthops, make the kernel think this neigh * is active regardless of the traffic. */ neigh_event_send(neigh_entry->key.n, NULL); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); } static void @@ -2360,15 +2404,13 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) * the nexthop wouldn't get offloaded until the neighbor is resolved * but it wouldn't get resolved ever in case traffic is flowing in HW * using different nexthop. - * - * Take RTNL mutex here to prevent lists from changes. */ - rtnl_lock(); + mutex_lock(&router->lock); list_for_each_entry(neigh_entry, &router->nexthop_neighs_list, nexthop_neighs_list_node) if (!neigh_entry->connected) neigh_event_send(neigh_entry->key.n, NULL); - rtnl_unlock(); + mutex_unlock(&router->lock); mlxsw_core_schedule_dw(&router->nexthop_probe_dw, MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); @@ -2506,7 +2548,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) dead = n->dead; read_unlock_bh(&n->lock); - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); entry_connected = nud_state & NUD_VALID && !dead; @@ -2528,7 +2570,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); out: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); neigh_release(n); kfree(net_work); } @@ -3189,7 +3231,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index = nh_grp->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; - int err; for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; @@ -3200,6 +3241,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, } if (nh->update || reallocate) { + int err = 0; + switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: err = mlxsw_sp_nexthop_update @@ -3711,9 +3754,15 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) { - struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + struct net_device *ul_dev; + bool is_up; + + rcu_read_lock(); + ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true; + rcu_read_unlock(); - return ul_dev ? (ul_dev->flags & IFF_UP) : true; + return is_up; } static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, @@ -3840,10 +3889,14 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (!dev) return 0; - in_dev = __in_dev_get_rtnl(dev); + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) + fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) { + rcu_read_unlock(); return 0; + } + rcu_read_unlock(); err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); if (err) @@ -4473,6 +4526,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, { struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct mlxsw_sp_router *router = mlxsw_sp->router; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4487,12 +4541,13 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry, ipip_entry); } - if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id, - dip.addr4)) { - u32 t_index; + if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id, + MLXSW_SP_L3_PROTO_IPV4, + &dip)) { + u32 tunnel_index; - t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp); - fib_entry->decap.tunnel_index = t_index; + tunnel_index = router->nve_decap_config.tunnel_index; + fib_entry->decap.tunnel_index = tunnel_index; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; return 0; } @@ -5923,8 +5978,7 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - /* Protect internal structures from changes */ - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { @@ -5946,7 +6000,7 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); kfree(fib_work); } @@ -5957,7 +6011,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { @@ -5984,7 +6038,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); kfree(fib_work); } @@ -5997,6 +6051,7 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) int err; rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: @@ -6025,6 +6080,7 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) dev_put(fib_work->ven_info.dev); break; } + mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); kfree(fib_work); } @@ -6233,7 +6289,7 @@ err_fib_event: return NOTIFY_BAD; } -struct mlxsw_sp_rif * +static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { @@ -6247,6 +6303,41 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } +bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + mutex_unlock(&mlxsw_sp->router->lock); + + return rif; +} + +u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + u16 vid = 0; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + goto out; + + /* We only return the VID for VLAN RIFs. Otherwise we return an + * invalid value (0). + */ + if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) + goto out; + + vid = mlxsw_sp_fid_8021q_vid(rif->fid); + +out: + mutex_unlock(&mlxsw_sp->router->lock); + return vid; +} + static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) { char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -6281,7 +6372,8 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, case NETDEV_UP: return rif == NULL; case NETDEV_DOWN: - idev = __in_dev_get_rtnl(dev); + rcu_read_lock(); + idev = __in_dev_get_rcu(dev); if (idev && idev->ifa_list) addr_list_empty = false; @@ -6289,6 +6381,7 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, if (addr_list_empty && inet6_dev && !list_empty(&inet6_dev->addr_list)) addr_list_empty = false; + rcu_read_unlock(); /* macvlans do not have a RIF, but rather piggy back on the * RIF of their lower device. @@ -6411,11 +6504,6 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) return rif->dev; } -struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif) -{ - return rif->fid; -} - static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -6528,10 +6616,13 @@ void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_rif *rif; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) - return; + goto out; mlxsw_sp_rif_destroy(rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); } static void @@ -6631,8 +6722,8 @@ err_fid_port_vid_map: return err; } -void -mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +static void +__mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; @@ -6650,6 +6741,16 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) mlxsw_sp_rif_subport_put(rif); } +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + mutex_unlock(&mlxsw_sp->router->lock); +} + static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, struct net_device *port_dev, unsigned long event, u16 vid, @@ -6667,7 +6768,7 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, extack); case NETDEV_DOWN: - mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); break; } @@ -6848,8 +6949,8 @@ err_rif_vrrp_add: return err; } -void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, - const struct net_device *macvlan_dev) +static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) { struct macvlan_dev *vlan = netdev_priv(macvlan_dev); struct mlxsw_sp_rif *rif; @@ -6866,6 +6967,14 @@ void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_index(rif->fid), false); } +void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) +{ + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + mutex_unlock(&mlxsw_sp->router->lock); +} + static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, struct net_device *macvlan_dev, unsigned long event, @@ -6875,7 +6984,7 @@ static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, case NETDEV_UP: return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack); case NETDEV_DOWN: - mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); break; } @@ -6945,15 +7054,17 @@ static int mlxsw_sp_inetaddr_event(struct notifier_block *nb, /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */ if (event == NETDEV_UP) - goto out; + return NOTIFY_DONE; router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb); + mutex_lock(&router->lock); rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL); out: + mutex_unlock(&router->lock); return notifier_from_errno(err); } @@ -6968,8 +7079,9 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) - goto out; + return NOTIFY_DONE; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; @@ -6981,6 +7093,7 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack); out: + mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); } @@ -7001,6 +7114,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) struct mlxsw_sp_rif *rif; rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) @@ -7008,6 +7122,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL); out: + mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); dev_put(dev); kfree(inet6addr_work); @@ -7052,8 +7167,9 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) - goto out; + return NOTIFY_DONE; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; @@ -7065,6 +7181,7 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack); out: + mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); } @@ -7151,24 +7268,30 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, { struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif *rif; + int err = 0; mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) return 0; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) - return 0; + goto out; switch (event) { case NETDEV_CHANGEMTU: /* fall through */ case NETDEV_CHANGEADDR: - return mlxsw_sp_router_port_change_event(mlxsw_sp, rif); + err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif); + break; case NETDEV_PRE_CHANGEADDR: - return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + break; } - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, @@ -7211,9 +7334,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, if (!mlxsw_sp || netif_is_macvlan(l3_dev)) return 0; + mutex_lock(&mlxsw_sp->router->lock); switch (event) { case NETDEV_PRECHANGEUPPER: - return 0; + break; case NETDEV_CHANGEUPPER: if (info->linking) { struct netlink_ext_ack *extack; @@ -7225,6 +7349,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, } break; } + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -7351,13 +7476,14 @@ u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) return mlxsw_core_max_ports(mlxsw_sp->core) + 1; } -static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) +static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + u16 fid_index = mlxsw_sp_fid_index(rif->fid); int err; - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true); + err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, + true); if (err) return err; @@ -7386,13 +7512,13 @@ err_fid_bc_flood_set: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); return err; } -static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { - u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + u16 fid_index = mlxsw_sp_fid_index(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; @@ -7404,10 +7530,41 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); } static struct mlxsw_sp_fid * +mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); +} + +static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) +{ + struct switchdev_notifier_fdb_info info; + struct net_device *dev; + + dev = br_fdb_find_port(rif->dev, mac, 0); + if (!dev) + return; + + info.addr = mac; + info.vid = 0; + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { + .type = MLXSW_SP_RIF_TYPE_FID, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_fid_configure, + .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .fid_get = mlxsw_sp_rif_fid_fid_get, + .fdb_del = mlxsw_sp_rif_fid_fdb_del, +}; + +static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { @@ -7428,7 +7585,7 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, } } - return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack); + return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid); } static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -7449,103 +7606,6 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) NULL); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { - .type = MLXSW_SP_RIF_TYPE_VLAN, - .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_vlan_configure, - .deconfigure = mlxsw_sp_rif_vlan_deconfigure, - .fid_get = mlxsw_sp_rif_vlan_fid_get, - .fdb_del = mlxsw_sp_rif_vlan_fdb_del, -}; - -static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) -{ - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - u16 fid_index = mlxsw_sp_fid_index(rif->fid); - int err; - - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, - true); - if (err) - return err; - - err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, - mlxsw_sp_router_port(mlxsw_sp), true); - if (err) - goto err_fid_mc_flood_set; - - err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, - mlxsw_sp_router_port(mlxsw_sp), true); - if (err) - goto err_fid_bc_flood_set; - - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(rif->fid), true); - if (err) - goto err_rif_fdb_op; - - mlxsw_sp_fid_rif_set(rif->fid, rif); - return 0; - -err_rif_fdb_op: - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, - mlxsw_sp_router_port(mlxsw_sp), false); -err_fid_bc_flood_set: - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, - mlxsw_sp_router_port(mlxsw_sp), false); -err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); - return err; -} - -static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) -{ - u16 fid_index = mlxsw_sp_fid_index(rif->fid); - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - struct mlxsw_sp_fid *fid = rif->fid; - - mlxsw_sp_fid_rif_set(fid, NULL); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(fid), false); - mlxsw_sp_rif_macvlan_flush(rif); - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, - mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, - mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); -} - -static struct mlxsw_sp_fid * -mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, - struct netlink_ext_ack *extack) -{ - return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack); -} - -static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) -{ - struct switchdev_notifier_fdb_info info; - struct net_device *dev; - - dev = br_fdb_find_port(rif->dev, mac, 0); - if (!dev) - return; - - info.addr = mac; - info.vid = 0; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, - NULL); -} - -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { - .type = MLXSW_SP_RIF_TYPE_FID, - .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_fid_configure, - .deconfigure = mlxsw_sp_rif_fid_deconfigure, - .fid_get = mlxsw_sp_rif_fid_fid_get, - .fdb_del = mlxsw_sp_rif_fid_fdb_del, -}; - static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = { .type = MLXSW_SP_RIF_TYPE_VLAN, .rif_size = sizeof(struct mlxsw_sp_rif), @@ -7733,28 +7793,32 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, u16 *ul_rif_index) { struct mlxsw_sp_rif *ul_rif; + int err = 0; - ASSERT_RTNL(); - + mutex_lock(&mlxsw_sp->router->lock); ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); - if (IS_ERR(ul_rif)) - return PTR_ERR(ul_rif); + if (IS_ERR(ul_rif)) { + err = PTR_ERR(ul_rif); + goto out; + } *ul_rif_index = ul_rif->rif_index; - - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index) { struct mlxsw_sp_rif *ul_rif; - ASSERT_RTNL(); - + mutex_lock(&mlxsw_sp->router->lock); ul_rif = mlxsw_sp->router->rifs[ul_rif_index]; if (WARN_ON(!ul_rif)) - return; + goto out; mlxsw_sp_ul_rif_put(ul_rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); } static int @@ -8004,6 +8068,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); if (!router) return -ENOMEM; + mutex_init(&router->lock); mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; @@ -8107,6 +8172,7 @@ err_router_init: err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); err_register_inetaddr_notifier: + mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; } @@ -8127,5 +8193,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) __mlxsw_sp_router_fini(mlxsw_sp); unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); + mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index c9b94f435cdd..8418dc3ae967 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -7,6 +7,49 @@ #include "spectrum.h" #include "reg.h" +struct mlxsw_sp_router_nve_decap { + u32 ul_tb_id; + u32 tunnel_index; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; + u8 valid:1; +}; + +struct mlxsw_sp_router { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif **rifs; + struct mlxsw_sp_vr *vrs; + struct rhashtable neigh_ht; + struct rhashtable nexthop_group_ht; + struct rhashtable nexthop_ht; + struct list_head nexthop_list; + struct { + /* One tree for each protocol: IPv4 and IPv6 */ + struct mlxsw_sp_lpm_tree *proto_trees[2]; + struct mlxsw_sp_lpm_tree *trees; + unsigned int tree_count; + } lpm; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_neighs_list; + struct list_head ipip_list; + bool aborted; + struct notifier_block fib_nb; + struct notifier_block netevent_nb; + struct notifier_block inetaddr_nb; + struct notifier_block inet6addr_nb; + const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_ipip_ops **ipip_ops_arr; + u32 adj_discard_index; + bool adj_discard_index_valid; + struct mlxsw_sp_router_nve_decap nve_decap_config; + struct mutex lock; /* Protects shared router resources */ +}; + struct mlxsw_sp_rif_ipip_lb; struct mlxsw_sp_rif_ipip_lb_config { enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 0cdd7954a085..9fb2e9d93929 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -3,6 +3,8 @@ #include <linux/if_bridge.h> #include <linux/list.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> #include <net/arp.h> #include <net/gre.h> #include <net/lag.h> @@ -14,38 +16,43 @@ #include "spectrum_span.h" #include "spectrum_switchdev.h" +struct mlxsw_sp_span { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + atomic_t active_entries_count; + int entries_count; + struct mlxsw_sp_span_entry entries[0]; +}; + +static void mlxsw_sp_span_respin_work(struct work_struct *work); + static u64 mlxsw_sp_span_occ_get(void *priv) { const struct mlxsw_sp *mlxsw_sp = priv; - u64 occ = 0; - int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - if (mlxsw_sp->span.entries[i].ref_count) - occ++; - } - - return occ; + return atomic_read(&mlxsw_sp->span->active_entries_count); } int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - int i; + struct mlxsw_sp_span *span; + int i, entries_count; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) return -EIO; - mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, - MAX_SPAN); - mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, - sizeof(struct mlxsw_sp_span_entry), - GFP_KERNEL); - if (!mlxsw_sp->span.entries) + entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_SPAN); + span = kzalloc(struct_size(span, entries, entries_count), GFP_KERNEL); + if (!span) return -ENOMEM; + span->entries_count = entries_count; + atomic_set(&span->active_entries_count, 0); + span->mlxsw_sp = mlxsw_sp; + mlxsw_sp->span = span; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; INIT_LIST_HEAD(&curr->bound_ports_list); curr->id = i; @@ -53,6 +60,7 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN, mlxsw_sp_span_occ_get, mlxsw_sp); + INIT_WORK(&span->work, mlxsw_sp_span_respin_work); return 0; } @@ -62,14 +70,15 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; + cancel_work_sync(&mlxsw_sp->span->work); devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN); - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); } - kfree(mlxsw_sp->span.entries); + kfree(mlxsw_sp->span); } static int @@ -645,15 +654,16 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp, int i; /* find a free entry to use */ - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - if (!mlxsw_sp->span.entries[i].ref_count) { - span_entry = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + if (!mlxsw_sp->span->entries[i].ref_count) { + span_entry = &mlxsw_sp->span->entries[i]; break; } } if (!span_entry) return NULL; + atomic_inc(&mlxsw_sp->span->active_entries_count); span_entry->ops = ops; span_entry->ref_count = 1; span_entry->to_dev = to_dev; @@ -662,9 +672,11 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp, return span_entry; } -static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry) +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) { mlxsw_sp_span_entry_deconfigure(span_entry); + atomic_dec(&mlxsw_sp->span->active_entries_count); } struct mlxsw_sp_span_entry * @@ -673,8 +685,8 @@ mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, { int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; if (curr->ref_count && curr->to_dev == to_dev) return curr; @@ -694,8 +706,8 @@ mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id) { int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; if (curr->ref_count && curr->id == span_id) return curr; @@ -726,7 +738,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, { WARN_ON(!span_entry->ref_count); if (--span_entry->ref_count == 0) - mlxsw_sp_span_entry_destroy(span_entry); + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); return 0; } @@ -736,8 +748,8 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) struct mlxsw_sp_span_inspected_port *p; int i; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; list_for_each_entry(p, &curr->bound_ports_list, list) if (p->local_port == port->local_port && @@ -842,9 +854,9 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, * so if a binding is requested, check for conflicts. */ if (bind) - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { struct mlxsw_sp_span_entry *curr = - &mlxsw_sp->span.entries[i]; + &mlxsw_sp->span->entries[i]; if (mlxsw_sp_span_entry_bound_port_find(curr, type, port, bind)) @@ -988,14 +1000,18 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id, mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); } -void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_span_respin_work(struct work_struct *work) { - int i; - int err; + struct mlxsw_sp_span *span; + struct mlxsw_sp *mlxsw_sp; + int i, err; - ASSERT_RTNL(); - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + span = container_of(work, struct mlxsw_sp_span, work); + mlxsw_sp = span->mlxsw_sp; + + rtnl_lock(); + for (i = 0; i < mlxsw_sp->span->entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i]; struct mlxsw_sp_span_parms sparms = {NULL}; if (!curr->ref_count) @@ -1010,4 +1026,12 @@ void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms); } } + rtnl_unlock(); +} + +void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) +{ + if (atomic_read(&mlxsw_sp->span->active_entries_count) == 0) + return; + mlxsw_core_schedule_work(&mlxsw_sp->span->work); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a3af171c6358..a26162b08b7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -153,16 +153,64 @@ static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp); } +static int mlxsw_sp_bridge_device_vxlan_init(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) { + err = mlxsw_sp_bridge_vxlan_join(bridge->mlxsw_sp, + br_dev, dev, 0, + extack); + if (err) { + stop_dev = dev; + goto err_vxlan_join; + } + } + } + + return 0; + +err_vxlan_join: + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) { + if (stop_dev == dev) + break; + mlxsw_sp_bridge_vxlan_leave(bridge->mlxsw_sp, dev); + } + } + return err; +} + +static void mlxsw_sp_bridge_device_vxlan_fini(struct mlxsw_sp_bridge *bridge, + struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev) && netif_running(dev)) + mlxsw_sp_bridge_vxlan_leave(bridge->mlxsw_sp, dev); + } +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct device *dev = bridge->mlxsw_sp->bus_info->dev; struct mlxsw_sp_bridge_device *bridge_device; bool vlan_enabled = br_vlan_enabled(br_dev); + int err; if (vlan_enabled && bridge->vlan_enabled_exists) { dev_err(dev, "Only one VLAN-aware bridge is supported\n"); + NL_SET_ERR_MSG_MOD(extack, "Only one VLAN-aware bridge is supported"); return ERR_PTR(-EINVAL); } @@ -184,13 +232,29 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, INIT_LIST_HEAD(&bridge_device->mids_list); list_add(&bridge_device->list, &bridge->bridges_list); + /* It is possible we already have VXLAN devices enslaved to the bridge. + * In which case, we need to replay their configuration as if they were + * just now enslaved to the bridge. + */ + err = mlxsw_sp_bridge_device_vxlan_init(bridge, br_dev, extack); + if (err) + goto err_vxlan_init; + return bridge_device; + +err_vxlan_init: + list_del(&bridge_device->list); + if (bridge_device->vlan_enabled) + bridge->vlan_enabled_exists = false; + kfree(bridge_device); + return ERR_PTR(err); } static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + mlxsw_sp_bridge_device_vxlan_fini(bridge, bridge_device->dev); mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, bridge_device->dev); list_del(&bridge_device->list); @@ -203,7 +267,8 @@ mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_bridge_device *bridge_device; @@ -211,7 +276,7 @@ mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge, if (bridge_device) return bridge_device; - return mlxsw_sp_bridge_device_create(bridge, br_dev); + return mlxsw_sp_bridge_device_create(bridge, br_dev, extack); } static void @@ -292,7 +357,8 @@ mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, - struct net_device *brport_dev) + struct net_device *brport_dev, + struct netlink_ext_ack *extack) { struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev); struct mlxsw_sp_bridge_device *bridge_device; @@ -305,7 +371,7 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, return bridge_port; } - bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev); + bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev, extack); if (IS_ERR(bridge_device)) return ERR_CAST(bridge_device); @@ -1000,7 +1066,7 @@ mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list); mlxsw_sp_bridge_port_get(mlxsw_sp_port->mlxsw_sp->bridge, - bridge_port->dev); + bridge_port->dev, extack); mlxsw_sp_port_vlan->bridge_port = bridge_port; return 0; @@ -1107,16 +1173,12 @@ mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev, const struct switchdev_obj_port_vlan *vlan) { - struct mlxsw_sp_rif *rif; - struct mlxsw_sp_fid *fid; u16 pvid; u16 vid; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev); - if (!rif) + pvid = mlxsw_sp_rif_vid(mlxsw_sp, br_dev); + if (!pvid) return 0; - fid = mlxsw_sp_rif_fid(rif); - pvid = mlxsw_sp_fid_8021q_vid(fid); for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { @@ -1712,36 +1774,6 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, } } -struct mlxsw_sp_span_respin_work { - struct work_struct work; - struct mlxsw_sp *mlxsw_sp; -}; - -static void mlxsw_sp_span_respin_work(struct work_struct *work) -{ - struct mlxsw_sp_span_respin_work *respin_work = - container_of(work, struct mlxsw_sp_span_respin_work, work); - - rtnl_lock(); - mlxsw_sp_span_respin(respin_work->mlxsw_sp); - rtnl_unlock(); - kfree(respin_work); -} - -static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_span_respin_work *respin_work; - - respin_work = kzalloc(sizeof(*respin_work), GFP_ATOMIC); - if (!respin_work) - return; - - INIT_WORK(&respin_work->work, mlxsw_sp_span_respin_work); - respin_work->mlxsw_sp = mlxsw_sp; - - mlxsw_core_schedule_work(&respin_work->work); -} - static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans, @@ -1763,7 +1795,7 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, * call for later, so that the respin logic sees the * updated bridge state. */ - mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); } break; case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1916,7 +1948,7 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, break; } - mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); return err; } @@ -1990,12 +2022,11 @@ mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, return err; } - /* If no other port is member in the VLAN, then the FID does not exist. - * NVE will be enabled on the FID once a port joins the VLAN - */ - fid = mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid); - if (!fid) - return 0; + fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); + if (IS_ERR(fid)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create 802.1Q FID"); + return PTR_ERR(fid); + } if (mlxsw_sp_fid_vni_is_set(fid)) { NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); @@ -2007,11 +2038,6 @@ mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, if (err) goto err_nve_fid_enable; - /* The tunnel port does not hold a reference on the FID. Only - * local ports and the router port - */ - mlxsw_sp_fid_put(fid); - return 0; err_nve_fid_enable: @@ -2048,38 +2074,8 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - struct net_device *vxlan_dev; - struct mlxsw_sp_fid *fid; - int err; - - fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); - if (IS_ERR(fid)) - return fid; - - if (mlxsw_sp_fid_vni_is_set(fid)) - return fid; - - /* Find the VxLAN device that has the specified VLAN configured as - * PVID and egress untagged. There can be at most one such device - */ - vxlan_dev = mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, - vid); - if (!vxlan_dev) - return fid; - - if (!netif_running(vxlan_dev)) - return fid; - - err = mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, vid, - extack); - if (err) - goto err_vxlan_join; - - return fid; -err_vxlan_join: - mlxsw_sp_fid_put(fid); - return ERR_PTR(err); + return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); } static struct mlxsw_sp_fid * @@ -2184,9 +2180,9 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_fid *fid; int err; - fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); - if (!fid) { - NL_SET_ERR_MSG_MOD(extack, "Did not find a corresponding FID"); + fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + if (IS_ERR(fid)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create 802.1D FID"); return -EINVAL; } @@ -2200,11 +2196,6 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, if (err) goto err_nve_fid_enable; - /* The tunnel port does not hold a reference on the FID. Only - * local ports and the router port - */ - mlxsw_sp_fid_put(fid); - return 0; err_nve_fid_enable: @@ -2218,34 +2209,8 @@ mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - struct net_device *vxlan_dev; - struct mlxsw_sp_fid *fid; - int err; - fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); - if (IS_ERR(fid)) - return fid; - - if (mlxsw_sp_fid_vni_is_set(fid)) - return fid; - - vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev); - if (!vxlan_dev) - return fid; - - if (!netif_running(vxlan_dev)) - return fid; - - err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, 0, - extack); - if (err) - goto err_vxlan_join; - - return fid; - -err_vxlan_join: - mlxsw_sp_fid_put(fid); - return ERR_PTR(err); + return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); } static struct mlxsw_sp_fid * @@ -2287,7 +2252,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port; int err; - bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev); + bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev, + extack); if (IS_ERR(bridge_port)) return PTR_ERR(bridge_port); bridge_device = bridge_port->bridge_device; @@ -2351,21 +2317,11 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, return; mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); + /* Drop both the reference we just took during lookup and the reference + * the VXLAN device took. + */ + mlxsw_sp_fid_put(fid); mlxsw_sp_fid_put(fid); -} - -struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - u16 vid, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); - if (WARN_ON(!bridge_device)) - return ERR_PTR(-EINVAL); - - return bridge_device->ops->fid_get(bridge_device, vid, extack); } static void @@ -2718,19 +2674,24 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp, + bool no_delay) { struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; + unsigned int interval = no_delay ? 0 : bridge->fdb_notify.interval; mlxsw_core_schedule_dw(&bridge->fdb_notify.dw, - msecs_to_jiffies(bridge->fdb_notify.interval)); + msecs_to_jiffies(interval)); } +#define MLXSW_SP_FDB_SFN_QUERIES_PER_SESSION 10 + static void mlxsw_sp_fdb_notify_work(struct work_struct *work) { struct mlxsw_sp_bridge *bridge; struct mlxsw_sp *mlxsw_sp; char *sfn_pl; + int queries; u8 num_rec; int i; int err; @@ -2743,20 +2704,26 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = bridge->mlxsw_sp; rtnl_lock(); - mlxsw_reg_sfn_pack(sfn_pl); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); - if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); - goto out; + queries = MLXSW_SP_FDB_SFN_QUERIES_PER_SESSION; + while (queries > 0) { + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); + goto out; + } + num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + if (num_rec != MLXSW_REG_SFN_REC_MAX_COUNT) + goto out; + queries--; } - num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); - for (i = 0; i < num_rec; i++) - mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); out: rtnl_unlock(); kfree(sfn_pl); - mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp, !queries); } struct mlxsw_sp_switchdev_event_work { @@ -3502,7 +3469,7 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work); bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; - mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp, false); return 0; err_register_switchdev_blocking_notifier: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 60205aa3f6a5..9096ffd89e50 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -1,13 +1,16 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 /* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ +#include <linux/bitops.h> #include <linux/kernel.h> +#include <linux/netlink.h> #include <net/devlink.h> #include <uapi/linux/devlink.h> #include "core.h" #include "reg.h" #include "spectrum.h" +#include "spectrum_trap.h" /* All driver-specific traps must be documented in * Documentation/networking/devlink/mlxsw.rst @@ -25,36 +28,166 @@ enum { #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT +static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + kfree_skb(skb); + return -EINVAL; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + + return 0; +} + static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, - void *priv); + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); + consume_skb(skb); +} + +static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index; + const struct flow_action_cookie *fa_cookie; + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + rcu_read_lock(); + fa_cookie = mlxsw_sp_acl_act_cookie_lookup(mlxsw_sp, cookie_index); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, fa_cookie); + rcu_read_unlock(); + consume_skb(skb); +} + static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, - void *trap_ctx); + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + int err; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port); + if (err) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); + skb_pull(skb, ETH_HLEN); + skb->offload_fwd_mark = 1; + netif_receive_skb(skb); +} #define MLXSW_SP_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) +#define MLXSW_SP_TRAP_DROP_EXT(_id, _group_id, _metadata) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + MLXSW_SP_TRAP_METADATA | (_metadata)) + #define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id) \ DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id, \ DEVLINK_MLXSW_TRAP_NAME_##_id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id) \ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ MLXSW_SP_TRAP_METADATA) #define MLXSW_SP_RXL_DISCARD(_id, _group_id) \ - MLXSW_RXL(mlxsw_sp_rx_drop_listener, DISCARD_##_id, SET_FW_DEFAULT, \ - false, SP_##_group_id, DISCARD) + MLXSW_RXL_DIS(mlxsw_sp_rx_drop_listener, DISCARD_##_id, \ + TRAP_EXCEPTION_TO_CPU, false, SP_##_group_id, \ + SET_FW_DEFAULT, SP_##_group_id) + +#define MLXSW_SP_RXL_ACL_DISCARD(_id, _en_group_id, _dis_group_id) \ + MLXSW_RXL_DIS(mlxsw_sp_rx_acl_drop_listener, DISCARD_##_id, \ + TRAP_EXCEPTION_TO_CPU, false, SP_##_en_group_id, \ + SET_FW_DEFAULT, SP_##_dis_group_id) #define MLXSW_SP_RXL_EXCEPTION(_id, _group_id, _action) \ MLXSW_RXL(mlxsw_sp_rx_exception_listener, _id, \ - _action, false, SP_##_group_id, DISCARD) + _action, false, SP_##_group_id, SET_FW_DEFAULT) + +#define MLXSW_SP_TRAP_POLICER(_id, _rate, _burst) \ + DEVLINK_TRAP_POLICER(_id, _rate, _burst, \ + MLXSW_REG_QPCR_HIGHEST_CIR, \ + MLXSW_REG_QPCR_LOWEST_CIR, \ + 1 << MLXSW_REG_QPCR_HIGHEST_CBS, \ + 1 << MLXSW_REG_QPCR_LOWEST_CBS) + +/* Ordered by policer identifier */ +static const struct devlink_trap_policer mlxsw_sp_trap_policers_arr[] = { + MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128), +}; -static struct devlink_trap mlxsw_sp_traps_arr[] = { +static const struct devlink_trap_group mlxsw_sp_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1), + DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1), +}; + +static const struct devlink_trap mlxsw_sp_traps_arr[] = { MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), @@ -83,9 +216,13 @@ static struct devlink_trap mlxsw_sp_traps_arr[] = { MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), + MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), }; -static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { +static const struct mlxsw_listener mlxsw_sp_listeners_arr[] = { MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS), MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), @@ -103,34 +240,37 @@ static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DISCARDS), - MLXSW_SP_RXL_EXCEPTION(MTUERROR, ROUTER_EXP, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(TTLERROR, ROUTER_EXP, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(RPF, RPF, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, REMOTE_ROUTE, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, HOST_MISS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, HOST_MISS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, REMOTE_ROUTE, + MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS, TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, ROUTER_EXP, + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS, TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, ROUTER_EXP, + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS, TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), - MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, ROUTER_EXP, TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, ROUTER_EXP, + MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, TUNNEL_DISCARDS, TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, TRAP_EXCEPTION_TO_CPU), MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), + MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS, DUMMY), + MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS, DUMMY), }; /* Mapping between hardware trap and devlink trap. Multiple hardware traps can * be mapped to the same devlink trap. Order is according to * 'mlxsw_sp_listeners_arr'. */ -static u16 mlxsw_sp_listener_devlink_map[] = { +static const u16 mlxsw_sp_listener_devlink_map[] = { DEVLINK_TRAP_GENERIC_ID_SMAC_MC, DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH, DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, @@ -164,99 +304,168 @@ static u16 mlxsw_sp_listener_devlink_map[] = { DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, + DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP, + DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP, }; -static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port, - struct mlxsw_sp_port *mlxsw_sp_port) +#define MLXSW_SP_THIN_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) + +static struct mlxsw_sp_trap_policer_item * +mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) { - struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; - if (unlikely(!mlxsw_sp_port)) { - dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", - local_port); - kfree_skb(skb); - return -EINVAL; + list_for_each_entry(policer_item, &trap->policer_item_list, list) { + if (policer_item->id == id) + return policer_item; } - skb->dev = mlxsw_sp_port->dev; + return NULL; +} - pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); - u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += skb->len; - u64_stats_update_end(&pcpu_stats->syncp); +static int mlxsw_sp_trap_cpu_policers_set(struct mlxsw_sp *mlxsw_sp) +{ + char qpcr_pl[MLXSW_REG_QPCR_LEN]; - skb->protocol = eth_type_trans(skb, skb->dev); + /* The purpose of "thin" policer is to drop as many packets + * as possible. The dummy group is using it. + */ + __set_bit(MLXSW_SP_THIN_POLICER_ID, mlxsw_sp->trap->policers_usage); + mlxsw_reg_qpcr_pack(qpcr_pl, MLXSW_SP_THIN_POLICER_ID, + MLXSW_REG_QPCR_IR_UNITS_M, false, 1, 4); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} - return 0; +static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY, + MLXSW_SP_THIN_POLICER_ID, 0, 1); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); } -static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, - void *trap_ctx) +static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp) { - struct devlink_port *in_devlink_port; - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; - struct devlink *devlink; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + u64 free_policers = 0; + u32 last_id = 0; + int err, i; - mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); - mlxsw_sp_port = mlxsw_sp->ports[local_port]; + for_each_clear_bit(i, trap->policers_usage, trap->max_policers) + free_policers++; - if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) - return; + if (ARRAY_SIZE(mlxsw_sp_trap_policers_arr) > free_policers) { + dev_err(mlxsw_sp->bus_info->dev, "Exceeded number of supported packet trap policers\n"); + return -ENOBUFS; + } - devlink = priv_to_devlink(mlxsw_sp->core); - in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, - local_port); - skb_push(skb, ETH_HLEN); - devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); - consume_skb(skb); -} + trap->policers_arr = kcalloc(free_policers, + sizeof(struct devlink_trap_policer), + GFP_KERNEL); + if (!trap->policers_arr) + return -ENOMEM; + + trap->policers_count = free_policers; + + for (i = 0; i < free_policers; i++) { + const struct devlink_trap_policer *policer; + + if (i < ARRAY_SIZE(mlxsw_sp_trap_policers_arr)) { + policer = &mlxsw_sp_trap_policers_arr[i]; + trap->policers_arr[i] = *policer; + last_id = policer->id; + } else { + /* Use parameters set for first policer and override + * relevant ones. + */ + policer = &mlxsw_sp_trap_policers_arr[0]; + trap->policers_arr[i] = *policer; + trap->policers_arr[i].id = ++last_id; + trap->policers_arr[i].init_rate = 1; + trap->policers_arr[i].init_burst = 16; + } + } -static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, - void *trap_ctx) -{ - struct devlink_port *in_devlink_port; - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; - struct devlink *devlink; + INIT_LIST_HEAD(&trap->policer_item_list); - mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); - mlxsw_sp_port = mlxsw_sp->ports[local_port]; + err = devlink_trap_policers_register(devlink, trap->policers_arr, + trap->policers_count); + if (err) + goto err_trap_policers_register; - if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) - return; + return 0; - devlink = priv_to_devlink(mlxsw_sp->core); - in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, - local_port); - skb_push(skb, ETH_HLEN); - devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); - skb_pull(skb, ETH_HLEN); - skb->offload_fwd_mark = 1; - netif_receive_skb(skb); +err_trap_policers_register: + kfree(trap->policers_arr); + return err; +} + +static void mlxsw_sp_trap_policers_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + + devlink_trap_policers_unregister(devlink, trap->policers_arr, + trap->policers_count); + WARN_ON(!list_empty(&trap->policer_item_list)); + kfree(trap->policers_arr); } int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) { + size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = mlxsw_sp_trap_cpu_policers_set(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_trap_dummy_group_init(mlxsw_sp); + if (err) + return err; if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) != ARRAY_SIZE(mlxsw_sp_listeners_arr))) return -EINVAL; - return devlink_traps_register(devlink, mlxsw_sp_traps_arr, - ARRAY_SIZE(mlxsw_sp_traps_arr), - mlxsw_sp); + err = mlxsw_sp_trap_policers_init(mlxsw_sp); + if (err) + return err; + + err = devlink_trap_groups_register(devlink, mlxsw_sp_trap_groups_arr, + groups_count); + if (err) + goto err_trap_groups_register; + + err = devlink_traps_register(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr), mlxsw_sp); + if (err) + goto err_traps_register; + + return 0; + +err_traps_register: + devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, + groups_count); +err_trap_groups_register: + mlxsw_sp_trap_policers_fini(mlxsw_sp); + return err; } void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) { + size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); devlink_traps_unregister(devlink, mlxsw_sp_traps_arr, ARRAY_SIZE(mlxsw_sp_traps_arr)); + devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, + groups_count); + mlxsw_sp_trap_policers_fini(mlxsw_sp); } int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, @@ -265,7 +474,7 @@ int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { - struct mlxsw_listener *listener; + const struct mlxsw_listener *listener; int err; if (mlxsw_sp_listener_devlink_map[i] != trap->id) @@ -286,7 +495,7 @@ void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { - struct mlxsw_listener *listener; + const struct mlxsw_listener *listener; if (mlxsw_sp_listener_devlink_map[i] != trap->id) continue; @@ -303,27 +512,24 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { - enum mlxsw_reg_hpkt_action hw_action; - struct mlxsw_listener *listener; + const struct mlxsw_listener *listener; + bool enabled; int err; if (mlxsw_sp_listener_devlink_map[i] != trap->id) continue; listener = &mlxsw_sp_listeners_arr[i]; - switch (action) { case DEVLINK_TRAP_ACTION_DROP: - hw_action = MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT; + enabled = false; break; case DEVLINK_TRAP_ACTION_TRAP: - hw_action = MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU; + enabled = true; break; default: return -EINVAL; } - - err = mlxsw_core_trap_action_set(mlxsw_core, listener, - hw_action); + err = mlxsw_core_trap_state_set(mlxsw_core, listener, enabled); if (err) return err; } @@ -331,62 +537,34 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, return 0; } -#define MLXSW_SP_DISCARD_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) - -static int -mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp, - const struct devlink_trap_group *group) -{ - enum mlxsw_reg_qpcr_ir_units ir_units; - char qpcr_pl[MLXSW_REG_QPCR_LEN]; - u16 policer_id; - u8 burst_size; - bool is_bytes; - u32 rate; - - switch (group->id) { - case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: /* fall through */ - case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: /* fall through */ - case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: - policer_id = MLXSW_SP_DISCARD_POLICER_ID; - ir_units = MLXSW_REG_QPCR_IR_UNITS_M; - is_bytes = false; - rate = 10 * 1024; /* 10Kpps */ - burst_size = 7; - break; - default: - return -EINVAL; - } - - mlxsw_reg_qpcr_pack(qpcr_pl, policer_id, ir_units, is_bytes, rate, - burst_size); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); -} - static int -__mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, - const struct devlink_trap_group *group) +__mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + u32 policer_id) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u16 hw_policer_id = MLXSW_REG_HTGT_INVALID_POLICER; char htgt_pl[MLXSW_REG_HTGT_LEN]; u8 priority, tc, group_id; - u16 policer_id; switch (group->id) { case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS; - policer_id = MLXSW_SP_DISCARD_POLICER_ID; priority = 0; tc = 1; break; case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS; - policer_id = MLXSW_SP_DISCARD_POLICER_ID; priority = 0; tc = 1; break; case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS; - policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; + case DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS; priority = 0; tc = 1; break; @@ -394,23 +572,179 @@ __mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - mlxsw_reg_htgt_pack(htgt_pl, group_id, policer_id, priority, tc); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); + if (policer_id) { + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, + policer_id); + if (WARN_ON(!policer_item)) + return -EINVAL; + hw_policer_id = policer_item->hw_id; + } + + mlxsw_reg_htgt_pack(htgt_pl, group_id, hw_policer_id, priority, tc); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap_group *group) { - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + return __mlxsw_sp_trap_group_init(mlxsw_core, group, + group->init_policer_id); +} + +int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group, + const struct devlink_trap_policer *policer) +{ + u32 policer_id = policer ? policer->id : 0; + + return __mlxsw_sp_trap_group_init(mlxsw_core, group, policer_id); +} + +static struct mlxsw_sp_trap_policer_item * +mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, u32 id) +{ + struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + u16 hw_id; + + /* We should be able to allocate a policer because the number of + * policers we registered with devlink is in according with the number + * of available policers. + */ + hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers); + if (WARN_ON(hw_id == trap->max_policers)) + return ERR_PTR(-ENOBUFS); + + policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL); + if (!policer_item) + return ERR_PTR(-ENOMEM); + + __set_bit(hw_id, trap->policers_usage); + policer_item->hw_id = hw_id; + policer_item->id = id; + list_add_tail(&policer_item->list, &trap->policer_item_list); + + return policer_item; +} + +static void +mlxsw_sp_trap_policer_item_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_trap_policer_item *policer_item) +{ + list_del(&policer_item->list); + __clear_bit(policer_item->hw_id, mlxsw_sp->trap->policers_usage); + kfree(policer_item); +} + +static int mlxsw_sp_trap_policer_bs(u64 burst, u8 *p_burst_size, + struct netlink_ext_ack *extack) +{ + int bs = fls64(burst) - 1; + + if (burst != (1 << bs)) { + NL_SET_ERR_MSG_MOD(extack, "Policer burst size is not power of two"); + return -EINVAL; + } + + *p_burst_size = bs; + + return 0; +} + +static int __mlxsw_sp_trap_policer_set(struct mlxsw_sp *mlxsw_sp, u16 hw_id, + u64 rate, u64 burst, bool clear_counter, + struct netlink_ext_ack *extack) +{ + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + u8 burst_size; int err; - err = mlxsw_sp_trap_group_policer_init(mlxsw_sp, group); + err = mlxsw_sp_trap_policer_bs(burst, &burst_size, extack); if (err) return err; - err = __mlxsw_sp_trap_group_init(mlxsw_sp, group); + mlxsw_reg_qpcr_pack(qpcr_pl, hw_id, MLXSW_REG_QPCR_IR_UNITS_M, false, + rate, burst_size); + mlxsw_reg_qpcr_clear_counter_set(qpcr_pl, clear_counter); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +int mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + int err; + + policer_item = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer->id); + if (IS_ERR(policer_item)) + return PTR_ERR(policer_item); + + err = __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, + policer->init_rate, + policer->init_burst, true, NULL); + if (err) + goto err_trap_policer_set; + + return 0; + +err_trap_policer_set: + mlxsw_sp_trap_policer_item_fini(mlxsw_sp, policer_item); + return err; +} + +void mlxsw_sp_trap_policer_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return; + + mlxsw_sp_trap_policer_item_fini(mlxsw_sp, policer_item); +} + +int mlxsw_sp_trap_policer_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 rate, u64 burst, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + return __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, + rate, burst, false, extack); +} + +int +mlxsw_sp_trap_policer_counter_get(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_policer *policer, + u64 *p_drops) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_trap_policer_item *policer_item; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + int err; + + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + mlxsw_reg_qpcr_pack(qpcr_pl, policer_item->hw_id, + MLXSW_REG_QPCR_IR_UNITS_M, false, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); if (err) return err; + *p_drops = mlxsw_reg_qpcr_violate_count_get(qpcr_pl); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h new file mode 100644 index 000000000000..8c54897ba173 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_TRAP_H +#define _MLXSW_SPECTRUM_TRAP_H + +#include <linux/list.h> +#include <net/devlink.h> + +struct mlxsw_sp_trap { + struct devlink_trap_policer *policers_arr; /* Registered policers */ + u64 policers_count; /* Number of registered policers */ + struct list_head policer_item_list; + u64 max_policers; + unsigned long policers_usage[]; /* Usage bitmap */ +}; + +struct mlxsw_sp_trap_policer_item { + u16 hw_id; + u32 id; + struct list_head list; /* Member of policer_item_list */ +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index f0e98ec8f1ee..90535820b559 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -180,7 +180,7 @@ static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port, if (err) return err; oper_status = mlxsw_reg_paos_oper_status_get(paos_pl); - *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false; + *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 12e1fa998d42..eaa521b7561b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -102,6 +102,8 @@ enum { MLXSW_TRAP_ID_ACL1 = 0x1C1, /* Multicast trap used for routes with trap-and-forward action */ MLXSW_TRAP_ID_ACL2 = 0x1C2, + MLXSW_TRAP_ID_DISCARD_INGRESS_ACL = 0x1C3, + MLXSW_TRAP_ID_DISCARD_EGRESS_ACL = 0x1C4, MLXSW_TRAP_ID_MAX = 0x1FF }; |