summaryrefslogtreecommitdiff
path: root/net/bpf/test_run.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 02:07:23 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 02:07:23 +0300
commit6e98b09da931a00bf4e0477d0fa52748bf28fcce (patch)
tree9c658ed95add5693f42f29f63df80a2ede3f6ec2 /net/bpf/test_run.c
parentb68ee1c6131c540a62ecd443be89c406401df091 (diff)
parent9b78d919632b7149d311aaad5a977e4b48b10321 (diff)
downloadlinux-6e98b09da931a00bf4e0477d0fa52748bf28fcce.tar.xz
Merge tag 'net-next-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Introduce a config option to tweak MAX_SKB_FRAGS. Increasing the default value allows for better BIG TCP performances - Reduce compound page head access for zero-copy data transfers - RPS/RFS improvements, avoiding unneeded NET_RX_SOFTIRQ when possible - Threaded NAPI improvements, adding defer skb free support and unneeded softirq avoidance - Address dst_entry reference count scalability issues, via false sharing avoidance and optimize refcount tracking - Add lockless accesses annotation to sk_err[_soft] - Optimize again the skb struct layout - Extends the skb drop reasons to make it usable by multiple subsystems - Better const qualifier awareness for socket casts BPF: - Add skb and XDP typed dynptrs which allow BPF programs for more ergonomic and less brittle iteration through data and variable-sized accesses - Add a new BPF netfilter program type and minimal support to hook BPF programs to netfilter hooks such as prerouting or forward - Add more precise memory usage reporting for all BPF map types - Adds support for using {FOU,GUE} encap with an ipip device operating in collect_md mode and add a set of BPF kfuncs for controlling encap params - Allow BPF programs to detect at load time whether a particular kfunc exists or not, and also add support for this in light skeleton - Bigger batch of BPF verifier improvements to prepare for upcoming BPF open-coded iterators allowing for less restrictive looping capabilities - Rework RCU enforcement in the verifier, add kptr_rcu and enforce BPF programs to NULL-check before passing such pointers into kfunc - Add support for kptrs in percpu hashmaps, percpu LRU hashmaps and in local storage maps - Enable RCU semantics for task BPF kptrs and allow referenced kptr tasks to be stored in BPF maps - Add support for refcounted local kptrs to the verifier for allowing shared ownership, useful for adding a node to both the BPF list and rbtree - Add BPF verifier support for ST instructions in convert_ctx_access() which will help new -mcpu=v4 clang flag to start emitting them - Add ARM32 USDT support to libbpf - Improve bpftool's visual program dump which produces the control flow graph in a DOT format by adding C source inline annotations Protocols: - IPv4: Allow adding to IPv4 address a 'protocol' tag. Such value indicates the provenance of the IP address - IPv6: optimize route lookup, dropping unneeded R/W lock acquisition - Add the handshake upcall mechanism, allowing the user-space to implement generic TLS handshake on kernel's behalf - Bridge: support per-{Port, VLAN} neighbor suppression, increasing resilience to nodes failures - SCTP: add support for Fair Capacity and Weighted Fair Queueing schedulers - MPTCP: delay first subflow allocation up to its first usage. This will allow for later better LSM interaction - xfrm: Remove inner/outer modes from input/output path. These are not needed anymore - WiFi: - reduced neighbor report (RNR) handling for AP mode - HW timestamping support - support for randomized auth/deauth TA for PASN privacy - per-link debugfs for multi-link - TC offload support for mac80211 drivers - mac80211 mesh fast-xmit and fast-rx support - enable Wi-Fi 7 (EHT) mesh support Netfilter: - Add nf_tables 'brouting' support, to force a packet to be routed instead of being bridged - Update bridge netfilter and ovs conntrack helpers to handle IPv6 Jumbo packets properly, i.e. fetch the packet length from hop-by-hop extension header. This is needed for BIT TCP support - The iptables 32bit compat interface isn't compiled in by default anymore - Move ip(6)tables builtin icmp matches to the udptcp one. This has the advantage that icmp/icmpv6 match doesn't load the iptables/ip6tables modules anymore when iptables-nft is used - Extended netlink error report for netdevice in flowtables and netdev/chains. Allow for incrementally add/delete devices to netdev basechain. Allow to create netdev chain without device Driver API: - Remove redundant Device Control Error Reporting Enable, as PCI core has already error reporting enabled at enumeration time - Move Multicast DB netlink handlers to core, allowing devices other then bridge to use them - Allow the page_pool to directly recycle the pages from safely localized NAPI - Implement lockless TX queue stop/wake combo macros, allowing for further code de-duplication and sanitization - Add YNL support for user headers and struct attrs - Add partial YNL specification for devlink - Add partial YNL specification for ethtool - Add tc-mqprio and tc-taprio support for preemptible traffic classes - Add tx push buf len param to ethtool, specifies the maximum number of bytes of a transmitted packet a driver can push directly to the underlying device - Add basic LED support for switch/phy - Add NAPI documentation, stop relaying on external links - Convert dsa_master_ioctl() to netdev notifier. This is a preparatory work to make the hardware timestamping layer selectable by user space - Add transceiver support and improve the error messages for CAN-FD controllers New hardware / drivers: - Ethernet: - AMD/Pensando core device support - MediaTek MT7981 SoC - MediaTek MT7988 SoC - Broadcom BCM53134 embedded switch - Texas Instruments CPSW9G ethernet switch - Qualcomm EMAC3 DWMAC ethernet - StarFive JH7110 SoC - NXP CBTX ethernet PHY - WiFi: - Apple M1 Pro/Max devices - RealTek rtl8710bu/rtl8188gu - RealTek rtl8822bs, rtl8822cs and rtl8821cs SDIO chipset - Bluetooth: - Realtek RTL8821CS, RTL8851B, RTL8852BS - Mediatek MT7663, MT7922 - NXP w8997 - Actions Semi ATS2851 - QTI WCN6855 - Marvell 88W8997 - Can: - STMicroelectronics bxcan stm32f429 Drivers: - Ethernet NICs: - Intel (1G, icg): - add tracking and reporting of QBV config errors - add support for configuring max SDU for each Tx queue - Intel (100G, ice): - refactor mailbox overflow detection to support Scalable IOV - GNSS interface optimization - Intel (i40e): - support XDP multi-buffer - nVidia/Mellanox: - add the support for linux bridge multicast offload - enable TC offload for egress and engress MACVLAN over bond - add support for VxLAN GBP encap/decap flows offload - extend packet offload to fully support libreswan - support tunnel mode in mlx5 IPsec packet offload - extend XDP multi-buffer support - support MACsec VLAN offload - add support for dynamic msix vectors allocation - drop RX page_cache and fully use page_pool - implement thermal zone to report NIC temperature - Netronome/Corigine: - add support for multi-zone conntrack offload - Solarflare/Xilinx: - support offloading TC VLAN push/pop actions to the MAE - support TC decap rules - support unicast PTP - Other NICs: - Broadcom (bnxt): enforce software based freq adjustments only on shared PHC NIC - RealTek (r8169): refactor to addess ASPM issues during NAPI poll - Micrel (lan8841): add support for PTP_PF_PEROUT - Cadence (macb): enable PTP unicast - Engleder (tsnep): add XDP socket zero-copy support - virtio-net: implement exact header length guest feature - veth: add page_pool support for page recycling - vxlan: add MDB data path support - gve: add XDP support for GQI-QPL format - geneve: accept every ethertype - macvlan: allow some packets to bypass broadcast queue - mana: add support for jumbo frame - Ethernet high-speed switches: - Microchip (sparx5): Add support for TC flower templates - Ethernet embedded switches: - Broadcom (b54): - configure 6318 and 63268 RGMII ports - Marvell (mv88e6xxx): - faster C45 bus scan - Microchip: - lan966x: - add support for IS1 VCAP - better TX/RX from/to CPU performances - ksz9477: add ETS Qdisc support - ksz8: enhance static MAC table operations and error handling - sama7g5: add PTP capability - NXP (ocelot): - add support for external ports - add support for preemptible traffic classes - Texas Instruments: - add CPSWxG SGMII support for J7200 and J721E - Intel WiFi (iwlwifi): - preparation for Wi-Fi 7 EHT and multi-link support - EHT (Wi-Fi 7) sniffer support - hardware timestamping support for some devices/firwmares - TX beacon protection on newer hardware - Qualcomm 802.11ax WiFi (ath11k): - MU-MIMO parameters support - ack signal support for management packets - RealTek WiFi (rtw88): - SDIO bus support - better support for some SDIO devices (e.g. MAC address from efuse) - RealTek WiFi (rtw89): - HW scan support for 8852b - better support for 6 GHz scanning - support for various newer firmware APIs - framework firmware backwards compatibility - MediaTek WiFi (mt76): - P2P support - mesh A-MSDU support - EHT (Wi-Fi 7) support - coredump support" * tag 'net-next-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2078 commits) net: phy: hide the PHYLIB_LEDS knob net: phy: marvell-88x2222: remove unnecessary (void*) conversions tcp/udp: Fix memleaks of sk and zerocopy skbs with TX timestamp. net: amd: Fix link leak when verifying config failed net: phy: marvell: Fix inconsistent indenting in led_blink_set lan966x: Don't use xdp_frame when action is XDP_TX tsnep: Add XDP socket zero-copy TX support tsnep: Add XDP socket zero-copy RX support tsnep: Move skb receive action to separate function tsnep: Add functions for queue enable/disable tsnep: Rework TX/RX queue initialization tsnep: Replace modulo operation with mask net: phy: dp83867: Add led_brightness_set support net: phy: Fix reading LED reg property drivers: nfc: nfcsim: remove return value check of `dev_dir` net: phy: dp83867: Remove unnecessary (void*) conversions net: ethtool: coalesce: try to make user settings stick twice net: mana: Check if netdev/napi_alloc_frag returns single page net: mana: Rename mana_refill_rxoob and remove some empty lines net: veth: add page_pool stats ...
Diffstat (limited to 'net/bpf/test_run.c')
-rw-r--r--net/bpf/test_run.c207
1 files changed, 189 insertions, 18 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f81b24320a36..e79e3a415ca9 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -19,7 +19,9 @@
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
+#include <linux/netfilter.h>
#include <net/xdp.h>
+#include <net/netfilter/nf_bpf_link.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -215,6 +217,16 @@ static void xdp_test_run_teardown(struct xdp_test_data *xdp)
kfree(xdp->skbs);
}
+static bool frame_was_changed(const struct xdp_page_head *head)
+{
+ /* xdp_scrub_frame() zeroes the data pointer, flags is the last field,
+ * i.e. has the highest chances to be overwritten. If those two are
+ * untouched, it's most likely safe to skip the context reset.
+ */
+ return head->frame->data != head->orig_ctx.data ||
+ head->frame->flags != head->orig_ctx.flags;
+}
+
static bool ctx_was_changed(struct xdp_page_head *head)
{
return head->orig_ctx.data != head->ctx.data ||
@@ -224,7 +236,7 @@ static bool ctx_was_changed(struct xdp_page_head *head)
static void reset_ctx(struct xdp_page_head *head)
{
- if (likely(!ctx_was_changed(head)))
+ if (likely(!frame_was_changed(head) && !ctx_was_changed(head)))
return;
head->ctx.data = head->orig_ctx.data;
@@ -538,6 +550,11 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
return (long)arg->a;
}
+__bpf_kfunc u32 bpf_fentry_test9(u32 *a)
+{
+ return *a;
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
@@ -567,6 +584,11 @@ long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
return (long)a + (long)b + (long)c + d;
}
+int noinline bpf_fentry_shadow_test(int a)
+{
+ return a + 1;
+}
+
struct prog_test_member1 {
int a;
};
@@ -598,6 +620,11 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
return &prog_test_struct;
}
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+ WARN_ON_ONCE(1);
+}
+
__bpf_kfunc struct prog_test_member *
bpf_kfunc_call_memb_acquire(void)
{
@@ -607,9 +634,6 @@ bpf_kfunc_call_memb_acquire(void)
__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
- if (!p)
- return;
-
refcount_dec(&p->cnt);
}
@@ -657,17 +681,6 @@ __bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
{
}
-__bpf_kfunc struct prog_test_ref_kfunc *
-bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
-{
- struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
-
- if (!p)
- return NULL;
- refcount_inc(&p->cnt);
- return p;
-}
-
struct prog_test_pass1 {
int x0;
struct {
@@ -744,6 +757,7 @@ __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
{
+ /* p != NULL, but p->cnt could be 0 */
}
__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
@@ -781,7 +795,6 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
@@ -791,9 +804,10 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
@@ -843,7 +857,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
- bpf_fentry_test8(&arg) != 0)
+ bpf_fentry_test8(&arg) != 0 ||
+ bpf_fentry_test9(&retval) != 0)
goto out;
break;
case BPF_MODIFY_RETURN:
@@ -1678,6 +1693,162 @@ out:
return err;
}
+static int verify_and_copy_hook_state(struct nf_hook_state *state,
+ const struct nf_hook_state *user,
+ struct net_device *dev)
+{
+ if (user->in || user->out)
+ return -EINVAL;
+
+ if (user->net || user->sk || user->okfn)
+ return -EINVAL;
+
+ switch (user->pf) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ switch (state->hook) {
+ case NF_INET_PRE_ROUTING:
+ state->in = dev;
+ break;
+ case NF_INET_LOCAL_IN:
+ state->in = dev;
+ break;
+ case NF_INET_FORWARD:
+ state->in = dev;
+ state->out = dev;
+ break;
+ case NF_INET_LOCAL_OUT:
+ state->out = dev;
+ break;
+ case NF_INET_POST_ROUTING:
+ state->out = dev;
+ break;
+ }
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ state->pf = user->pf;
+ state->hook = user->hook;
+
+ return 0;
+}
+
+static __be16 nfproto_eth(int nfproto)
+{
+ switch (nfproto) {
+ case NFPROTO_IPV4:
+ return htons(ETH_P_IP);
+ case NFPROTO_IPV6:
+ break;
+ }
+
+ return htons(ETH_P_IPV6);
+}
+
+int bpf_prog_test_run_nf(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct net_device *dev = net->loopback_dev;
+ struct nf_hook_state *user_ctx, hook_state = {
+ .pf = NFPROTO_IPV4,
+ .hook = NF_INET_LOCAL_OUT,
+ };
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ struct bpf_nf_ctx ctx = {
+ .state = &hook_state,
+ };
+ struct sk_buff *skb = NULL;
+ u32 retval, duration;
+ void *data;
+ int ret;
+
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
+ return -EINVAL;
+
+ if (size < sizeof(struct iphdr))
+ return -EINVAL;
+
+ data = bpf_test_init(kattr, kattr->test.data_size_in, size,
+ NET_SKB_PAD + NET_IP_ALIGN,
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ if (!repeat)
+ repeat = 1;
+
+ user_ctx = bpf_ctx_init(kattr, sizeof(struct nf_hook_state));
+ if (IS_ERR(user_ctx)) {
+ kfree(data);
+ return PTR_ERR(user_ctx);
+ }
+
+ if (user_ctx) {
+ ret = verify_and_copy_hook_state(&hook_state, user_ctx, dev);
+ if (ret)
+ goto out;
+ }
+
+ skb = slab_build_skb(data);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data = NULL; /* data released via kfree_skb */
+
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ __skb_put(skb, size);
+
+ ret = -EINVAL;
+
+ if (hook_state.hook != NF_INET_LOCAL_OUT) {
+ if (size < ETH_HLEN + sizeof(struct iphdr))
+ goto out;
+
+ skb->protocol = eth_type_trans(skb, dev);
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (hook_state.pf == NFPROTO_IPV4)
+ break;
+ goto out;
+ case htons(ETH_P_IPV6):
+ if (size < ETH_HLEN + sizeof(struct ipv6hdr))
+ goto out;
+ if (hook_state.pf == NFPROTO_IPV6)
+ break;
+ goto out;
+ default:
+ ret = -EPROTO;
+ goto out;
+ }
+
+ skb_reset_network_header(skb);
+ } else {
+ skb->protocol = nfproto_eth(hook_state.pf);
+ }
+
+ ctx.skb = skb;
+
+ ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false);
+ if (ret)
+ goto out;
+
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+
+out:
+ kfree(user_ctx);
+ kfree_skb(skb);
+ kfree(data);
+ return ret;
+}
+
static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
.owner = THIS_MODULE,
.set = &test_sk_check_kfunc_ids,