summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 21:57:23 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 21:57:23 +0300
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
downloadlinux-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.xz
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c213
1 files changed, 197 insertions, 16 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 4ef12e3e021a..26d01adbedad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -49,8 +49,14 @@ enum {
#define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
enum {
+ /* Packet was mirrored from ingress. */
+ MLXSW_SP_MIRROR_REASON_INGRESS = 1,
+ /* Packet was mirrored from policy engine. */
+ MLXSW_SP_MIRROR_REASON_POLICY_ENGINE = 2,
/* Packet was early dropped. */
MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9,
+ /* Packet was mirrored from egress. */
+ MLXSW_SP_MIRROR_REASON_EGRESS = 14,
};
static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
@@ -106,7 +112,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port,
void *trap_ctx)
{
- u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index;
+ u32 cookie_index = mlxsw_skb_cb(skb)->rx_md_info.cookie_index;
const struct flow_action_cookie *fa_cookie;
struct devlink_port *in_devlink_port;
struct mlxsw_sp_port *mlxsw_sp_port;
@@ -202,21 +208,175 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port);
}
+static struct mlxsw_sp_port *
+mlxsw_sp_sample_tx_port_get(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_rx_md_info *rx_md_info)
+{
+ u8 local_port;
+
+ if (!rx_md_info->tx_port_valid)
+ return NULL;
+
+ if (rx_md_info->tx_port_is_lag)
+ local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core,
+ rx_md_info->tx_lag_id,
+ rx_md_info->tx_lag_port_index);
+ else
+ local_port = rx_md_info->tx_sys_port;
+
+ if (local_port >= mlxsw_core_max_ports(mlxsw_sp->core))
+ return NULL;
+
+ return mlxsw_sp->ports[local_port];
+}
+
+/* The latency units are determined according to MOGCR.mirror_latency_units. It
+ * defaults to 64 nanoseconds.
+ */
+#define MLXSW_SP_MIRROR_LATENCY_SHIFT 6
+
+static void mlxsw_sp_psample_md_init(struct mlxsw_sp *mlxsw_sp,
+ struct psample_metadata *md,
+ struct sk_buff *skb, int in_ifindex,
+ bool truncate, u32 trunc_size)
+{
+ struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ md->trunc_size = truncate ? trunc_size : skb->len;
+ md->in_ifindex = in_ifindex;
+ mlxsw_sp_port = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info);
+ md->out_ifindex = mlxsw_sp_port && mlxsw_sp_port->dev ?
+ mlxsw_sp_port->dev->ifindex : 0;
+ md->out_tc_valid = rx_md_info->tx_tc_valid;
+ md->out_tc = rx_md_info->tx_tc;
+ md->out_tc_occ_valid = rx_md_info->tx_congestion_valid;
+ md->out_tc_occ = rx_md_info->tx_congestion;
+ md->latency_valid = rx_md_info->latency_valid;
+ md->latency = rx_md_info->latency;
+ md->latency <<= MLXSW_SP_MIRROR_LATENCY_SHIFT;
+}
+
static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
void *trap_ctx)
{
struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+ struct mlxsw_sp_sample_trigger trigger;
+ struct mlxsw_sp_sample_params *params;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct psample_metadata md = {};
+ int err;
+
+ err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+ if (err)
+ return;
+
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ goto out;
+
+ trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+ trigger.local_port = local_port;
+ params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+ if (!params)
+ goto out;
+
+ /* The psample module expects skb->data to point to the start of the
+ * Ethernet header.
+ */
+ skb_push(skb, ETH_HLEN);
+ mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+ mlxsw_sp_port->dev->ifindex, params->truncate,
+ params->trunc_size);
+ psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+ consume_skb(skb);
+}
+
+static void mlxsw_sp_rx_sample_tx_listener(struct sk_buff *skb, u8 local_port,
+ void *trap_ctx)
+{
+ struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info;
+ struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+ struct mlxsw_sp_port *mlxsw_sp_port, *mlxsw_sp_port_tx;
+ struct mlxsw_sp_sample_trigger trigger;
+ struct mlxsw_sp_sample_params *params;
+ struct psample_metadata md = {};
+ int err;
+
+ /* Locally generated packets are not reported from the policy engine
+ * trigger, so do not report them from the egress trigger as well.
+ */
+ if (local_port == MLXSW_PORT_CPU_PORT)
+ goto out;
+
+ err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+ if (err)
+ return;
+
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ goto out;
+
+ /* Packet was sampled from Tx, so we need to retrieve the sample
+ * parameters based on the Tx port and not the Rx port.
+ */
+ mlxsw_sp_port_tx = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info);
+ if (!mlxsw_sp_port_tx)
+ goto out;
+
+ trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+ trigger.local_port = mlxsw_sp_port_tx->local_port;
+ params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+ if (!params)
+ goto out;
+
+ /* The psample module expects skb->data to point to the start of the
+ * Ethernet header.
+ */
+ skb_push(skb, ETH_HLEN);
+ mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+ mlxsw_sp_port->dev->ifindex, params->truncate,
+ params->trunc_size);
+ psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+ consume_skb(skb);
+}
+
+static void mlxsw_sp_rx_sample_acl_listener(struct sk_buff *skb, u8 local_port,
+ void *trap_ctx)
+{
+ struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+ struct mlxsw_sp_sample_trigger trigger = {
+ .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+ };
+ struct mlxsw_sp_sample_params *params;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct psample_metadata md = {};
int err;
err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
if (err)
return;
- /* The sample handler expects skb->data to point to the start of the
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ goto out;
+
+ params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+ if (!params)
+ goto out;
+
+ /* The psample module expects skb->data to point to the start of the
* Ethernet header.
*/
skb_push(skb, ETH_HLEN);
- mlxsw_sp_sample_receive(mlxsw_sp, skb, local_port);
+ mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+ mlxsw_sp_port->dev->ifindex, params->truncate,
+ params->trunc_size);
+ psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+ consume_skb(skb);
}
#define MLXSW_SP_TRAP_DROP(_id, _group_id) \
@@ -464,11 +624,6 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
.priority = 2,
},
{
- .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
- .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
- .priority = 0,
- },
- {
.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 18),
.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
.priority = 4,
@@ -993,14 +1148,6 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
},
},
{
- .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
- MIRROR),
- .listeners_arr = {
- MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
- MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
- },
- },
- {
.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_TRAP, ACL_TRAP, TRAP),
.listeners_arr = {
MLXSW_SP_RXL_NO_MARK(ACL0, FLOW_LOGGING, TRAP_TO_CPU,
@@ -1709,10 +1856,23 @@ int mlxsw_sp_trap_group_policer_hw_id_get(struct mlxsw_sp *mlxsw_sp, u16 id,
static const struct mlxsw_sp_trap_group_item
mlxsw_sp1_trap_group_items_arr[] = {
+ {
+ .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+ .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+ .priority = 0,
+ },
};
static const struct mlxsw_sp_trap_item
mlxsw_sp1_trap_items_arr[] = {
+ {
+ .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+ MIRROR),
+ .listeners_arr = {
+ MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
+ MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
+ },
+ },
};
static int
@@ -1749,6 +1909,12 @@ mlxsw_sp2_trap_group_items_arr[] = {
.priority = 0,
.fixed_policer = true,
},
+ {
+ .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+ .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+ .priority = 0,
+ .fixed_policer = true,
+ },
};
static const struct mlxsw_sp_trap_item
@@ -1760,6 +1926,21 @@ mlxsw_sp2_trap_items_arr[] = {
},
.is_source = true,
},
+ {
+ .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+ MIRROR),
+ .listeners_arr = {
+ MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_listener, 1,
+ SP_PKT_SAMPLE,
+ MLXSW_SP_MIRROR_REASON_INGRESS),
+ MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_tx_listener, 1,
+ SP_PKT_SAMPLE,
+ MLXSW_SP_MIRROR_REASON_EGRESS),
+ MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_acl_listener, 1,
+ SP_PKT_SAMPLE,
+ MLXSW_SP_MIRROR_REASON_POLICY_ENGINE),
+ },
+ },
};
static int