From 967534e57c4439ba43b31f4af4cb85e84c86e6b7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:47 +0100 Subject: selftests/bpf: Verify skb metadata in BPF instead of userspace Move metadata verification into the BPF TC programs. Previously, userspace read metadata from a map and verified it once at test end. Now TC programs compare metadata directly using __builtin_memcmp() and set a test_pass flag. This enables verification at multiple points during test execution rather than a single final check. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-10-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 52 ++++--------- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 88 +++++++++++----------- 2 files changed, 57 insertions(+), 83 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 178292d1251a..93a1fbe6a4fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -171,33 +171,6 @@ static int write_test_packet(int tap_fd) return 0; } -static void assert_test_result(const struct bpf_map *result_map) -{ - int err; - __u32 map_key = 0; - __u8 map_value[TEST_PAYLOAD_LEN]; - - err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), - &map_value, TEST_PAYLOAD_LEN, BPF_ANY); - if (!ASSERT_OK(err, "lookup test_result")) - return; - - ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN, - "test_result map contains test payload"); -} - -static bool clear_test_result(struct bpf_map *result_map) -{ - const __u8 v[sizeof(test_payload)] = {}; - const __u32 k = 0; - int err; - - err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); - ASSERT_OK(err, "update test_result"); - - return err == 0; -} - void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -270,11 +243,13 @@ void test_xdp_context_veth(void) if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) goto close; + skel->bss->test_pass = false; + ret = send_test_packet(tx_ifindex); if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel->maps.test_result); + ASSERT_TRUE(skel->bss->test_pass, "test_pass"); close: close_netns(nstoken); @@ -286,7 +261,7 @@ close: static void test_tuntap(struct bpf_program *xdp_prog, struct bpf_program *tc_prio_1_prog, struct bpf_program *tc_prio_2_prog, - struct bpf_map *result_map) + bool *test_pass) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -295,8 +270,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, int tap_ifindex; int ret; - if (!clear_test_result(result_map)) - return; + *test_pass = false; ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) @@ -340,7 +314,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - assert_test_result(result_map); + ASSERT_TRUE(*test_pass, "test_pass"); close: if (tap_fd >= 0) @@ -431,37 +405,37 @@ void test_xdp_context_tuntap(void) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_read")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_write")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_write, skel->progs.ing_cls_dynptr_read, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice_rdwr")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_offset_wr, skel->progs.ing_cls_dynptr_offset_rd, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset_oob")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("clone_data_meta_empty_on_data_write")) test_tuntap_mirred(skel->progs.ing_xdp, skel->progs.clone_data_meta_empty_on_data_write, diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index d79cb74b571e..11288b20f56c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -11,37 +11,36 @@ #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -/* Demonstrates how metadata can be passed from an XDP program to a TC program - * using bpf_xdp_adjust_meta. - * For the sake of testing the metadata support in drivers, the XDP program uses - * a fixed-size payload after the Ethernet header as metadata. The TC program - * copies the metadata it receives into a map so it can be checked from - * userspace. +/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta. + * + * The XDP program extracts a fixed-size payload following the Ethernet header + * and stores it as packet metadata to test the driver's metadata support. The + * TC program then verifies if the passed metadata is correct. */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __uint(value_size, META_SIZE); -} test_result SEC(".maps"); - bool test_pass; +static const __u8 meta_want[META_SIZE] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; + SEC("tc") int ing_cls(struct __sk_buff *ctx) { - __u8 *data, *data_meta; - __u32 key = 0; - - data_meta = ctx_ptr(ctx, data_meta); - data = ctx_ptr(ctx, data); + __u8 *meta_have = ctx_ptr(ctx, data_meta); + __u8 *data = ctx_ptr(ctx, data); - if (data_meta + META_SIZE > data) - return TC_ACT_SHOT; + if (meta_have + META_SIZE > data) + goto out; - bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -49,17 +48,17 @@ int ing_cls(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_read(struct __sk_buff *ctx) { + __u8 meta_have[META_SIZE]; struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -86,20 +85,18 @@ SEC("tc") int ing_cls_dynptr_slice(struct __sk_buff *ctx) { struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst, *src; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + __u8 *meta_have; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); - if (!src) - return TC_ACT_SHOT; + meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!meta_have) + goto out; - __builtin_memcpy(dst, src, META_SIZE); + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -129,14 +126,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) { - struct bpf_dynptr meta; const __u32 chunk_len = META_SIZE / 4; - const __u32 zero = 0; + __u8 meta_have[META_SIZE]; + struct bpf_dynptr meta; __u8 *dst, *src; - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + dst = meta_have; /* 1. Regular read */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -155,9 +150,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) /* 4. Read from a slice starting at an offset */ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); if (!src) - return TC_ACT_SHOT; + goto out; __builtin_memcpy(dst, src, chunk_len); + if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + goto out; + + test_pass = true; +out: return TC_ACT_SHOT; } -- cgit v1.2.3 From 9ef9ac15a527739135548b87053f4646099e4bd6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:48 +0100 Subject: selftests/bpf: Dump skb metadata on verification failure Add diagnostic output when metadata verification fails to help with troubleshooting test failures. Introduce a check_metadata() helper that prints both expected and received metadata to the BPF program's stderr stream on mismatch. The userspace test reads and dumps this stream on failure. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-11-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 24 ++++++++++++++++++--- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 25 ++++++++++++++++++---- 2 files changed, 42 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 93a1fbe6a4fd..db3027564261 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -171,6 +171,21 @@ static int write_test_packet(int tap_fd) return 0; } +static void dump_err_stream(const struct bpf_program *prog) +{ + char buf[512]; + int ret; + + ret = 0; + do { + ret = bpf_prog_stream_read(bpf_program__fd(prog), + BPF_STREAM_STDERR, buf, sizeof(buf), + NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, stderr); + } while (ret > 0); +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -249,7 +264,8 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - ASSERT_TRUE(skel->bss->test_pass, "test_pass"); + if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass")) + dump_err_stream(tc_prog); close: close_netns(nstoken); @@ -314,7 +330,8 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - ASSERT_TRUE(*test_pass, "test_pass"); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog); close: if (tap_fd >= 0) @@ -385,7 +402,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - ASSERT_TRUE(*test_pass, "test_pass"); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prog); close: if (tap_fd >= 0) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 11288b20f56c..3b137c4eed6c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -27,6 +27,23 @@ static const __u8 meta_want[META_SIZE] = { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, }; +static bool check_metadata(const char *file, int line, __u8 *meta_have) +{ + if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) + return true; + + bpf_stream_printk(BPF_STREAM_STDERR, + "FAIL:%s:%d: metadata mismatch\n" + " have:\n %pI6\n %pI6\n" + " want:\n %pI6\n %pI6\n", + file, line, + &meta_have[0x00], &meta_have[0x10], + &meta_want[0x00], &meta_want[0x10]); + return false; +} + +#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -36,7 +53,7 @@ int ing_cls(struct __sk_buff *ctx) if (meta_have + META_SIZE > data) goto out; - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; @@ -54,7 +71,7 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx) bpf_dynptr_from_skb_meta(ctx, 0, &meta); bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; @@ -92,7 +109,7 @@ int ing_cls_dynptr_slice(struct __sk_buff *ctx) if (!meta_have) goto out; - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; @@ -153,7 +170,7 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) goto out; __builtin_memcpy(dst, src, chunk_len); - if (__builtin_memcmp(meta_want, meta_have, META_SIZE)) + if (!check_metadata(meta_have)) goto out; test_pass = true; -- cgit v1.2.3 From 1e1357fde808a35c6069759298660134e5dab053 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:49 +0100 Subject: selftests/bpf: Expect unclone to preserve skb metadata Since pskb_expand_head() no longer clears metadata on unclone, update tests for cloned packets to expect metadata to remain intact. Also simplify the clone_dynptr_kept_on_{data,meta}_slice_write tests. Creating an r/w dynptr slice is sufficient to trigger an unclone in the prologue, so remove the extraneous writes to the data/meta slice. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-12-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 24 ++--- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 118 ++++++++++++--------- 2 files changed, 79 insertions(+), 63 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index db3027564261..a129c3057202 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -454,29 +454,29 @@ void test_xdp_context_tuntap(void) skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, &skel->bss->test_pass); - if (test__start_subtest("clone_data_meta_empty_on_data_write")) + if (test__start_subtest("clone_data_meta_survives_data_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_data_write, + skel->progs.clone_data_meta_survives_data_write, &skel->bss->test_pass); - if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + if (test__start_subtest("clone_data_meta_survives_meta_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_meta_write, + skel->progs.clone_data_meta_survives_meta_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_data_slice_write, + skel->progs.clone_meta_dynptr_survives_data_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_meta_slice_write, + skel->progs.clone_meta_dynptr_survives_meta_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, &skel->bss->test_pass); test_xdp_meta__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 3b137c4eed6c..a70de55c6997 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -321,12 +321,13 @@ int ing_xdp(struct xdp_md *ctx) } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _payload_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _payload_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +int clone_data_meta_survives_data_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); if (eth + 1 > ctx_ptr(ctx, data_end)) @@ -335,8 +336,10 @@ int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect no metadata */ - if (ctx->data_meta != ctx->data) + if (meta_have + META_SIZE > eth) + goto out; + + if (!check_metadata(meta_have)) goto out; /* Packet write to trigger unclone in prologue */ @@ -348,14 +351,14 @@ out: } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _metadata_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _metadata_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); - __u8 *md = ctx_ptr(ctx, data_meta); if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; @@ -363,25 +366,29 @@ int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - if (md + 1 > ctx_ptr(ctx, data)) { - /* Expect no metadata */ - test_pass = true; - } else { - /* Metadata write to trigger unclone in prologue */ - *md = 42; - } + if (meta_have + META_SIZE > eth) + goto out; + + if (!check_metadata(meta_have)) + goto out; + + /* Metadata write to trigger unclone in prologue */ + *meta_have = 42; + + test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _payload_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates a r/w slice to packet _payload_. */ SEC("tc") -int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; struct ethhdr *eth; bpf_dynptr_from_skb(ctx, 0, &data); @@ -392,29 +399,26 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (!check_metadata(meta_have)) goto out; - /* Packet write to trigger unclone in prologue */ - eth->h_proto = 42; - test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _metadata_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates an r/w slice to packet _metadata_. */ SEC("tc") -int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; const struct ethhdr *eth; - __u8 *md; + __u8 *meta_have; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); @@ -424,16 +428,13 @@ int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!meta_have) goto out; - /* Metadata write to trigger unclone in prologue */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); - if (md) - *md = 42; + if (!check_metadata(meta_have)) + goto out; test_pass = true; out: @@ -441,14 +442,17 @@ out: } /* - * Check that skb_meta dynptr is read-only before prog writes to packet payload - * using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _payload_ using dynptr_write helper and metadata + * remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); @@ -458,17 +462,20 @@ int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect read-only metadata before unclone */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) + goto out; + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; /* Helper write to payload will unclone the packet */ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); - /* Expect no metadata after unclone */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; test_pass = true; @@ -477,14 +484,17 @@ out: } /* - * Check that skb_meta dynptr is read-only if prog writes to packet - * metadata using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _metadata_ using dynptr_write helper and + * metadata remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); @@ -494,14 +504,20 @@ int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) if (eth->h_proto != 0) goto out; - /* Expect read-only metadata */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) goto out; - /* Metadata write. Expect failure. */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) + goto out; + + /* Helper write to metadata will unclone the packet */ + bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0); + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; test_pass = true; -- cgit v1.2.3 From 354d020c29f72513dce4f3902890158d99b67b67 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:50 +0100 Subject: selftests/bpf: Cover skb metadata access after vlan push/pop helper Add a test to verify that skb metadata remains accessible after calling bpf_skb_vlan_push() and bpf_skb_vlan_pop(), which modify the packet headroom. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-13-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 6 +++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 43 ++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index a129c3057202..97c8f876f673 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -478,6 +478,12 @@ void test_xdp_context_tuntap(void) test_tuntap_mirred(skel->progs.ing_xdp, skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, &skel->bss->test_pass); + /* Tests for BPF helpers which touch headroom */ + if (test__start_subtest("helper_skb_vlan_push_pop")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_vlan_push_pop, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a70de55c6997..04c7487bb350 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -44,6 +44,16 @@ static bool check_metadata(const char *file, int line, __u8 *meta_have) #define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) +static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb) +{ + __u8 *data_meta = ctx_ptr(skb, data_meta); + __u8 *data = ctx_ptr(skb, data); + + return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta); +} + +#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb) + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -525,4 +535,37 @@ out: return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_vlan_push_pop(struct __sk_buff *ctx) +{ + int err; + + /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only + * secondary tag push triggers an actual MAC header modification. + */ + err = bpf_skb_vlan_push(ctx, 0, 42); + if (err) + goto out; + err = bpf_skb_vlan_push(ctx, 0, 207); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 29960e635b01b148e6db5a84957f99423fd85464 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:51 +0100 Subject: selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room Add a test to verify that skb metadata remains accessible after calling bpf_skb_adjust_room(), which modifies the packet headroom and can trigger head reallocation. The helper expects an Ethernet frame carrying an IP packet so switch test packet identification by source MAC address since we can no longer rely on Ethernet proto being set to zero. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-14-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 25 ++++++--- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 61 ++++++++++++++++++---- 2 files changed, 71 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 97c8f876f673..a3b82cf2f9e9 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -124,10 +124,10 @@ static int send_test_packet(int ifindex) int n, sock = -1; __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + /* We use the Ethernet header only to identify the test packet */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + }; memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); @@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd) __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int n; - /* The ethernet header doesn't need to be valid for this test */ - memset(packet, 0, sizeof(struct ethhdr)); + /* The Ethernet header is mostly not relevant. We use it to identify the + * test packet and some BPF helpers we exercise expect to operate on + * Ethernet frames carrying IP packets. Pretend that's the case. + */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + .h_proto = htons(ETH_P_IP), + }; + + memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); n = write(tap_fd, packet, sizeof(packet)); @@ -484,6 +492,11 @@ void test_xdp_context_tuntap(void) skel->progs.helper_skb_vlan_push_pop, NULL, /* tc prio 2 */ &skel->bss->test_pass); + if (test__start_subtest("helper_skb_adjust_room")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_adjust_room, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 04c7487bb350..6edc84d8dc52 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -20,6 +20,10 @@ bool test_pass; +static const __u8 smac_want[ETH_ALEN] = { + 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF, +}; + static const __u8 meta_want[META_SIZE] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, @@ -27,6 +31,11 @@ static const __u8 meta_want[META_SIZE] = { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, }; +static bool check_smac(const struct ethhdr *eth) +{ + return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN); +} + static bool check_metadata(const char *file, int line, __u8 *meta_have) { if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) @@ -281,7 +290,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx) /* Drop any non-test packets */ if (eth + 1 > ctx_ptr(ctx, data_end)) return XDP_DROP; - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -321,9 +330,9 @@ int ing_xdp(struct xdp_md *ctx) /* The Linux networking stack may send other packets on the test * interface that interfere with the test. Just drop them. - * The test packets can be recognized by their ethertype of zero. + * The test packets can be recognized by their source MAC address. */ - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; __builtin_memcpy(data_meta, payload, META_SIZE); @@ -343,7 +352,7 @@ int clone_data_meta_survives_data_write(struct __sk_buff *ctx) if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; if (meta_have + META_SIZE > eth) @@ -373,7 +382,7 @@ int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; if (meta_have + META_SIZE > eth) @@ -406,7 +415,7 @@ int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -435,7 +444,7 @@ int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -469,7 +478,7 @@ int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; /* Expect read-write metadata before unclone */ @@ -511,7 +520,7 @@ int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; /* Expect read-write metadata before unclone */ @@ -568,4 +577,38 @@ out: return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_adjust_room(struct __sk_buff *ctx) +{ + int err; + + /* Grow a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Shrink a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Grow a 256 byte hole to trigger head reallocation */ + err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 85d454afef612f880c69cb39a7d74772a24411d3 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:52 +0100 Subject: selftests/bpf: Cover skb metadata access after change_head/tail helper Add a test to verify that skb metadata remains accessible after calling bpf_skb_change_head() and bpf_skb_change_tail(), which modify packet headroom/tailroom and can trigger head reallocation. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-15-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 5 ++++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 34 ++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index a3b82cf2f9e9..65735a134abb 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -497,6 +497,11 @@ void test_xdp_context_tuntap(void) skel->progs.helper_skb_adjust_room, NULL, /* tc prio 2 */ &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_head_tail")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_head_tail, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 6edc84d8dc52..e0b2e8ed0cc5 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -611,4 +611,38 @@ out: return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_change_head_tail(struct __sk_buff *ctx) +{ + int err; + + /* Reserve 1 extra in the front for packet data */ + err = bpf_skb_change_head(ctx, 1, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 256 extra bytes in the front to trigger head reallocation */ + err = bpf_skb_change_head(ctx, 256, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 4k extra bytes in the back to trigger head reallocation */ + err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d2c5cca3fb58f96732e1dfd19e36b43e4e54d214 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 5 Nov 2025 21:19:53 +0100 Subject: selftests/bpf: Cover skb metadata access after bpf_skb_change_proto Add a test to verify that skb metadata remains accessible after calling bpf_skb_change_proto(), which modifies packet headroom to accommodate different IP header sizes. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251105-skb-meta-rx-path-v4-16-5ceb08a9b37b@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 5 +++++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 25 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 65735a134abb..ee94c281888a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -502,6 +502,11 @@ void test_xdp_context_tuntap(void) skel->progs.helper_skb_change_head_tail, NULL, /* tc prio 2 */ &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_proto")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_proto, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index e0b2e8ed0cc5..0a0f371a2dec 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -4,6 +4,7 @@ #include #include +#include #include #include "bpf_kfuncs.h" @@ -645,4 +646,28 @@ out: return TC_ACT_SHOT; } +SEC("tc") +int helper_skb_change_proto(struct __sk_buff *ctx) +{ + int err; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From beb3c67297d92f9428484410cf79135d38d0aff3 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 11:56:32 +0800 Subject: bpf/selftests: Add selftest for bpf_smc_hs_ctrl This tests introduces a tiny smc_hs_ctrl for filtering SMC connections based on IP pairs, and also adds a realistic topology model to verify it. Also, we can only use SMC loopback under CI test, so an additional configuration needs to be enabled. Follow the steps below to run this test. make -C tools/testing/selftests/bpf cd tools/testing/selftests/bpf sudo ./test_progs -t smc Results shows: Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: D. Wythe Signed-off-by: Martin KaFai Lau Tested-by: Saket Kumar Bhaskar Reviewed-by: Zhu Yanjun Link: https://patch.msgid.link/20251107035632.115950-4-alibuda@linux.alibaba.com --- tools/testing/selftests/bpf/config | 5 + .../selftests/bpf/prog_tests/test_bpf_smc.c | 390 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_smc.c | 117 +++++++ 3 files changed, 512 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_smc.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 70b28c1e653e..fcd2f9bf78c9 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -123,3 +123,8 @@ CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y +CONFIG_INFINIBAND=y +CONFIG_SMC=y +CONFIG_SMC_HS_CTRL_BPF=y +CONFIG_DIBS=y +CONFIG_DIBS_LO=y \ No newline at end of file diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c new file mode 100644 index 000000000000..de22734abc4d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "network_helpers.h" +#include "bpf_smc.skel.h" + +#ifndef IPPROTO_SMC +#define IPPROTO_SMC 256 +#endif + +#define CLIENT_IP "127.0.0.1" +#define SERVER_IP "127.0.1.0" +#define SERVER_IP_VIA_RISK_PATH "127.0.2.0" + +#define SERVICE_1 80 +#define SERVICE_2 443 +#define SERVICE_3 8443 + +#define TEST_NS "bpf_smc_netns" + +static struct netns_obj *test_netns; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +#if defined(__s390x__) +/* s390x has default seid */ +static bool setup_ueid(void) { return true; } +static void cleanup_ueid(void) {} +#else +enum { + SMC_NETLINK_ADD_UEID = 10, + SMC_NETLINK_REMOVE_UEID +}; + +enum { + SMC_NLA_EID_TABLE_UNSPEC, + SMC_NLA_EID_TABLE_ENTRY, /* string */ +}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[1024]; +}; + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK" +#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID" + +static uint16_t smc_nl_family_id = -1; + +static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg = {0}; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = nlmsg_flags; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 1; + na = (struct nlattr *)GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static bool get_smc_nl_family_id(void) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlattr *nl; + int fd, ret; + pid_t pid; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "nl_family socket")) + return false; + + pid = getpid(); + + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "nl_family bind")) + goto fail; + + ret = send_cmd(fd, GENL_ID_CTRL, pid, + NLM_F_REQUEST, CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME, + strlen(SMC_GENL_FAMILY_NAME)); + if (!ASSERT_OK(ret, "nl_family query")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 || + !NLMSG_OK(&msg.n, ret), "nl_family response")) + goto fail; + + nl = (struct nlattr *)GENLMSG_DATA(&msg); + nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len)); + if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type")) + goto fail; + + smc_nl_family_id = *(uint16_t *)NLA_DATA(nl); + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool smc_ueid(int op) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlmsgerr *err; + char test_ueid[32]; + int fd, ret; + pid_t pid; + + /* UEID required */ + memset(test_ueid, '\x20', sizeof(test_ueid)); + memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID)); + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "ueid socket")) + return false; + + pid = getpid(); + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "ueid bind")) + goto fail; + + ret = send_cmd(fd, smc_nl_family_id, pid, + NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY, + (void *)test_ueid, sizeof(test_ueid)); + if (!ASSERT_OK(ret, "ueid cmd")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(ret < 0 || + !NLMSG_OK(&msg.n, ret), "ueid response")) + goto fail; + + if (msg.n.nlmsg_type == NLMSG_ERROR) { + err = NLMSG_DATA(&msg); + switch (op) { + case SMC_NETLINK_REMOVE_UEID: + if (!ASSERT_FALSE((err->error && err->error != -ENOENT), + "ueid remove")) + goto fail; + break; + case SMC_NETLINK_ADD_UEID: + if (!ASSERT_OK(err->error, "ueid add")) + goto fail; + break; + default: + break; + } + } + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool setup_ueid(void) +{ + /* get smc nl id */ + if (!get_smc_nl_family_id()) + return false; + /* clear old ueid for bpftest */ + smc_ueid(SMC_NETLINK_REMOVE_UEID); + /* smc-loopback required ueid */ + return smc_ueid(SMC_NETLINK_ADD_UEID); +} + +static void cleanup_ueid(void) +{ + smc_ueid(SMC_NETLINK_REMOVE_UEID); +} +#endif /* __s390x__ */ + +static bool setup_netns(void) +{ + test_netns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(test_netns, "open net namespace")) + goto fail_netns; + + SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo"); + SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo"); + + return true; +fail_ip: + netns_free(test_netns); +fail_netns: + return false; +} + +static void cleanup_netns(void) +{ + netns_free(test_netns); +} + +static bool setup_smc(void) +{ + if (!setup_ueid()) + return false; + + if (!setup_netns()) + goto fail_netns; + + return true; +fail_netns: + cleanup_ueid(); + return false; +} + +static int set_client_addr_cb(int fd, void *opts) +{ + const char *src = (const char *)opts; + struct sockaddr_in localaddr; + + localaddr.sin_family = AF_INET; + localaddr.sin_port = htons(0); + localaddr.sin_addr.s_addr = inet_addr(src); + return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind"); +} + +static void run_link(const char *src, const char *dst, int port) +{ + struct network_helper_opts opts = {0}; + int server, client; + + server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL); + if (!ASSERT_OK_FD(server, "start service_1")) + return; + + opts.proto = IPPROTO_TCP; + opts.post_socket_cb = set_client_addr_cb; + opts.cb_opts = (void *)src; + + client = connect_to_fd_opts(server, &opts); + if (!ASSERT_OK_FD(client, "start connect")) + goto fail_client; + + close(client); +fail_client: + close(server); +} + +static void block_link(int map_fd, const char *src, const char *dst) +{ + struct smc_policy_ip_value val = { .mode = /* block */ 0 }; + struct smc_policy_ip_key key = { + .sip = inet_addr(src), + .dip = inet_addr(dst), + }; + + bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); +} + +/* + * This test describes a real-life service topology as follows: + * + * +-------------> service_1 + * link 1 | | + * +--------------------> server | link 2 + * | | V + * | +-------------> service_2 + * | link 3 + * client -------------------> server_via_unsafe_path -> service_3 + * + * Among them, + * 1. link-1 is very suitable for using SMC. + * 2. link-2 is not suitable for using SMC, because the mode of this link is + * kind of short-link services. + * 3. link-3 is also not suitable for using SMC, because the RDMA link is + * unavailable and needs to go through a long timeout before it can fallback + * to TCP. + * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl. + */ +static void test_topo(void) +{ + struct bpf_smc *skel; + int rc, map_fd; + + skel = bpf_smc__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load")) + return; + + rc = bpf_smc__attach(skel); + if (!ASSERT_OK(rc, "bpf_smc__attach")) + goto fail; + + map_fd = bpf_map__fd(skel->maps.smc_policy_ip); + if (!ASSERT_OK_FD(map_fd, "bpf_map__fd")) + goto fail; + + /* Mock the process of transparent replacement, since we will modify + * protocol to ipproto_smc accropding to it via + * fmod_ret/update_socket_protocol. + */ + write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck"); + + /* Configure ip strat */ + block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH); + block_link(map_fd, SERVER_IP, SERVER_IP); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_1); + /* should go with smc fallback */ + run_link(SERVER_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc fallback */ + run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3); + + ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count"); + +fail: + bpf_smc__destroy(skel); +} + +void test_bpf_smc(void) +{ + if (!setup_smc()) { + printf("setup for smc test failed, test SKIP:\n"); + test__skip(); + return; + } + + if (test__start_subtest("topo")) + test_topo(); + + cleanup_ueid(); + cleanup_netns(); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c new file mode 100644 index 000000000000..70d8b08f5914 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_smc.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +enum { + BPF_SMC_LISTEN = 10, +}; + +struct smc_sock___local { + struct sock sk; + struct smc_sock *listen_smc; + bool use_fallback; +} __attribute__((preserve_access_index)); + +int smc_cnt = 0; +int fallback_cnt = 0; + +SEC("fentry/smc_release") +int BPF_PROG(bpf_smc_release, struct socket *sock) +{ + /* only count from one side (client) */ + if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN) + return 0; + smc_cnt++; + return 0; +} + +SEC("fentry/smc_switch_to_fallback") +int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc) +{ + /* only count from one side (client) */ + if (smc && !smc->listen_smc) + fallback_cnt++; + return 0; +} + +/* go with default value if no strat was found */ +bool default_ip_strat_value = true; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct smc_policy_ip_key)); + __uint(value_size, sizeof(struct smc_policy_ip_value)); + __uint(max_entries, 128); + __uint(map_flags, BPF_F_NO_PREALLOC); +} smc_policy_ip SEC(".maps"); + +static bool smc_check(__u32 src, __u32 dst) +{ + struct smc_policy_ip_value *value; + struct smc_policy_ip_key key = { + .sip = src, + .dip = dst, + }; + + value = bpf_map_lookup_elem(&smc_policy_ip, &key); + return value ? value->mode : default_ip_strat_value; +} + +SEC("fmod_ret/update_socket_protocol") +int BPF_PROG(smc_run, int family, int type, int protocol) +{ + struct task_struct *task; + + if (family != AF_INET && family != AF_INET6) + return protocol; + + if ((type & 0xf) != SOCK_STREAM) + return protocol; + + if (protocol != 0 && protocol != IPPROTO_TCP) + return protocol; + + task = bpf_get_current_task_btf(); + /* Prevent from affecting other tests */ + if (!task || !task->nsproxy->net_ns->smc.hs_ctrl) + return protocol; + + return IPPROTO_SMC; +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return smc_check(ireq->req.__req_common.skc_daddr, + ireq->req.__req_common.skc_rcv_saddr); +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp) +{ + return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr, + tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr); +} + +SEC(".struct_ops") +struct smc_hs_ctrl linkcheck = { + .name = "linkcheck", + .syn_option = (void *)bpf_smc_set_tcp_option, + .synack_option = (void *)bpf_smc_set_tcp_option_cond, +}; -- cgit v1.2.3