From 6001896f00984d317fb75160ba05c4a885fbe2a0 Mon Sep 17 00:00:00 2001 From: Sun Jian Date: Fri, 12 Jun 2026 19:40:31 +0800 Subject: bpf: Run generic devmap egress prog on private skb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generic XDP devmap multi redirect uses skb_clone() for intermediate destinations and sends the last destination with the original skb. This can leave multiple destinations sharing the same packet data. This becomes visible after generic devmap egress-program support was added: a devmap egress program may mutate packet data, and another destination sharing the same data can observe that mutation. Native XDP broadcast redirect does not have this issue because xdpf_clone() copies the frame data for each destination. Generic XDP should provide the same per-destination isolation before running a devmap egress program. Fix this by making cloned skbs private before running the generic devmap egress program. Use skb_copy() instead of skb_unshare() so allocation failure does not consume the skb and the existing caller error paths keep their ownership semantics. Fixes: 2ea5eabaf04a ("bpf: devmap: Implement devmap prog execution for generic XDP") Suggested-by: Jiayuan Chen Suggested-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sun Jian Link: https://lore.kernel.org/r/20260612114032.244616-2-sun.jian.kdev@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 5b9eac5342a9..dc7b859e8bbf 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -710,6 +710,18 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, if (unlikely(err)) return err; + if (dst->xdp_prog && skb_cloned(skb)) { + struct sk_buff *nskb; + + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + nskb->mac_len = skb->mac_len; + consume_skb(skb); + skb = nskb; + } + /* Redirect has already succeeded semantically at this point, so we just * return 0 even if packet is dropped. Helper below takes care of * freeing skb. -- cgit v1.2.3 From f0eff94d07cda9bd71754d95af4301cd437020b8 Mon Sep 17 00:00:00 2001 From: Sun Jian Date: Fri, 12 Jun 2026 19:40:32 +0800 Subject: selftests/bpf: Cover generic devmap egress last-dst rewrite Strengthen xdp_veth_egress to check that each destination observes the MAC selected for its own egress ifindex, instead of only checking that the observed MAC differs from a single magic value. Add a generic XDP last-destination test where an earlier destination does not have a devmap egress program while the final destination does. This covers the case where the final destination runs on the original skb and could otherwise rewrite packet data still shared with an earlier cloned skb. Use deterministic DEVMAP_HASH keys for the egress map so the intended last destination is stable. Initialize the result map with a sentinel value and check that store_mac_1 overwrites it before checking that the earlier destination did not observe the MAC written by the final destination. Suggested-by: Jiayuan Chen Signed-off-by: Sun Jian Link: https://lore.kernel.org/r/20260612114032.244616-3-sun.jian.kdev@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/test_xdp_veth.c | 166 ++++++++++++++++++++- 1 file changed, 163 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c index 3e98a1665936..1675b32753a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c @@ -456,7 +456,11 @@ static void xdp_veth_egress(u32 flags) .remote_flags = flags, } }; - const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; + const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = { + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 }, + }; struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; struct xdp_redirect_map *xdp_redirect_map; @@ -512,7 +516,13 @@ static void xdp_veth_egress(u32 flags) &net_config, prog_cfg, i)) goto destroy_xdp_redirect_map; - err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0); + { + __be64 mac = 0; + + memcpy(&mac, egress_macs[i], ETH_ALEN); + err = bpf_map_update_elem(mac_map, &ifindex, &mac, 0); + } + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto destroy_xdp_redirect_map; @@ -531,15 +541,162 @@ static void xdp_veth_egress(u32 flags) for (i = 0; i < 2; i++) { u32 key = i; + __be64 expected = 0; u64 res; err = bpf_map_lookup_elem(res_map, &key, &res); if (!ASSERT_OK(err, "get MAC res")) goto destroy_xdp_redirect_map; - ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac"); + /* store_mac_1/2 run on the second/third remote veths. */ + memcpy(&expected, egress_macs[i + 1], ETH_ALEN); + ASSERT_EQ(res, expected, "compare mac"); + } + +destroy_xdp_redirect_map: + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_redirect_multi_kern: + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); +destroy_xdp_dummy: + xdp_dummy__destroy(xdp_dummy); + + cleanup_network(&net_config); +} + +static void xdp_veth_egress_last_dst(u32 flags) +{ + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "store_mac_1", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + } + }; + const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = { + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 }, + { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 }, + }; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; + struct xdp_redirect_map *xdp_redirect_map; + struct net_configuration net_config = {}; + int mac_map, egress_map, res_map; + struct nstoken *nstoken = NULL; + struct xdp_dummy *xdp_dummy; + __be64 sentinel_mac = 0; + __be64 last_mac = 0; + __be64 res; + u32 key; + int err; + int i; + + xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) + return; + + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) + goto destroy_xdp_dummy; + + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_redirect_multi_kern; + + if (!ASSERT_OK(create_network(&net_config), "create network")) + goto destroy_xdp_redirect_map; + + mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map); + if (!ASSERT_OK_FD(mac_map, "open mac_map")) + goto destroy_xdp_redirect_map; + + egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress); + if (!ASSERT_OK_FD(egress_map, "open map_egress")) + goto destroy_xdp_redirect_map; + + bpf_objs[0] = xdp_dummy->obj; + bpf_objs[1] = xdp_redirect_multi_kern->obj; + bpf_objs[2] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + struct bpf_devmap_val devmap_val = {}; + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); + u32 key = i; + + SYS(destroy_xdp_redirect_map, + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", + net_config.veth_cfg[i].namespace, IP_NEIGH, + net_config.veth_cfg[i].remote_veth); + + if (attach_programs_to_veth_pair(bpf_objs, VETH_EGRESS_SKEL_NB, + &net_config, prog_cfg, i)) + goto destroy_xdp_redirect_map; + + { + __be64 mac = 0; + + memcpy(&mac, egress_macs[i], ETH_ALEN); + err = bpf_map_update_elem(mac_map, &ifindex, &mac, 0); + } + + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + + devmap_val.ifindex = ifindex; + devmap_val.bpf_prog.fd = -1; + + if (i == VETH_PAIRS_COUNT - 1) + devmap_val.bpf_prog.fd = + bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog); + + err = bpf_map_update_elem(egress_map, &key, &devmap_val, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; } + res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac); + if (!ASSERT_OK_FD(res_map, "open rx_map")) + goto destroy_xdp_redirect_map; + + memcpy(&sentinel_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN); + memcpy(&last_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN); + + key = 0; + err = bpf_map_update_elem(res_map, &key, &sentinel_mac, 0); + if (!ASSERT_OK(err, "init rx mac")) + goto destroy_xdp_redirect_map; + + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", + net_config.veth_cfg[0].namespace, IP_NEIGH); + + err = bpf_map_lookup_elem(res_map, &key, &res); + if (!ASSERT_OK(err, "get MAC res")) + goto destroy_xdp_redirect_map; + + if (!ASSERT_NEQ(res, sentinel_mac, "rx_mac overwritten by store_mac_1")) + goto destroy_xdp_redirect_map; + + if (!ASSERT_NEQ(res, last_mac, "earlier dst not rewritten by last dst")) + goto destroy_xdp_redirect_map; + destroy_xdp_redirect_map: close_netns(nstoken); xdp_redirect_map__destroy(xdp_redirect_map); @@ -596,4 +753,7 @@ void test_xdp_veth_egress(void) if (test__start_subtest("SKB_MODE/egress")) xdp_veth_egress(XDP_FLAGS_SKB_MODE); + + if (test__start_subtest("SKB_MODE/egress_last_dst")) + xdp_veth_egress_last_dst(XDP_FLAGS_SKB_MODE); } -- cgit v1.2.3