From 9054b41c4e1b5725e573c13166cee56bf7034bbd Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 18 Dec 2022 17:09:54 +0400 Subject: net: Fix documentation for unregister_netdevice_notifier_net unregister_netdevice_notifier_net() is used for unregister a notifier registered by register_netdevice_notifier_net(). Also s/into/from/. Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b76fb37b381e..cf78f35bc0b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(register_netdevice_notifier_net); * @nb: notifier * * Unregister a notifier previously registered by - * register_netdevice_notifier(). The notifier is unlinked into the + * register_netdevice_notifier_net(). The notifier is unlinked from the * kernel structures and may then be reused. A negative errno code * is returned on a failure. * -- cgit v1.2.3 From 9d03ebc71a027ca495c60f6e94d3cda81921791f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 19 Jan 2023 14:15:21 -0800 Subject: bpf: Rename bpf_{prog,map}_is_dev_bound to is_offloaded BPF offloading infra will be reused to implement bound-but-not-offloaded bpf programs. Rename existing helpers for clarity. No functional changes. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Reviewed-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-3-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 8 ++++---- kernel/bpf/core.c | 4 ++-- kernel/bpf/offload.c | 4 ++-- kernel/bpf/syscall.c | 22 +++++++++++----------- kernel/bpf/verifier.c | 18 +++++++++--------- net/core/dev.c | 4 ++-- net/core/filter.c | 2 +- 7 files changed, 31 insertions(+), 31 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ae7771c7d750..1bb525c0130e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2481,12 +2481,12 @@ void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) { return aux->offload_requested; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return unlikely(map->ops == &bpf_map_offload_ops); } @@ -2513,12 +2513,12 @@ static inline int bpf_prog_offload_init(struct bpf_prog *prog, return -EOPNOTSUPP; } -static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) { return false; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return false; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba3fff17e2f9..515f4f08591c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2182,7 +2182,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * valid program, which in this case would simply not * be JITed, but falls back to the interpreter. */ - if (!bpf_prog_is_dev_bound(fp->aux)) { + if (!bpf_prog_is_offloaded(fp->aux)) { *err = bpf_prog_alloc_jited_linfo(fp); if (*err) return fp; @@ -2553,7 +2553,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); - if (bpf_prog_is_dev_bound(aux)) + if (bpf_prog_is_offloaded(aux)) bpf_prog_offload_destroy(aux->prog); #ifdef CONFIG_PERF_EVENTS if (aux->prog->has_callchain_buf) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 13e4efc971e6..f5769a8ecbee 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -549,7 +549,7 @@ static bool __bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_offload_netdev *ondev1, *ondev2; struct bpf_prog_offload *offload; - if (!bpf_prog_is_dev_bound(prog->aux)) + if (!bpf_prog_is_offloaded(prog->aux)) return false; offload = prog->aux->offload; @@ -581,7 +581,7 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map) struct bpf_offloaded_map *offmap; bool ret; - if (!bpf_map_is_dev_bound(map)) + if (!bpf_map_is_offloaded(map)) return bpf_map_offload_neutral(map); offmap = map_to_offmap(map); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 35ffd808f281..5e90b697f908 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -181,7 +181,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, int err; /* Need to create a kthread, thus must support schedule */ - if (bpf_map_is_dev_bound(map)) { + if (bpf_map_is_offloaded(map)) { return bpf_map_offload_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { @@ -238,7 +238,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, void *ptr; int err; - if (bpf_map_is_dev_bound(map)) + if (bpf_map_is_offloaded(map)) return bpf_map_offload_lookup_elem(map, key, value); bpf_disable_instrumentation(); @@ -1483,7 +1483,7 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - if (bpf_map_is_dev_bound(map)) { + if (bpf_map_is_offloaded(map)) { err = bpf_map_offload_delete_elem(map, key); goto out; } else if (IS_FD_PROG_ARRAY(map) || @@ -1547,7 +1547,7 @@ static int map_get_next_key(union bpf_attr *attr) if (!next_key) goto free_key; - if (bpf_map_is_dev_bound(map)) { + if (bpf_map_is_offloaded(map)) { err = bpf_map_offload_get_next_key(map, key, next_key); goto out; } @@ -1605,7 +1605,7 @@ int generic_map_delete_batch(struct bpf_map *map, map->key_size)) break; - if (bpf_map_is_dev_bound(map)) { + if (bpf_map_is_offloaded(map)) { err = bpf_map_offload_delete_elem(map, key); break; } @@ -1851,7 +1851,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { - if (!bpf_map_is_dev_bound(map)) { + if (!bpf_map_is_offloaded(map)) { bpf_disable_instrumentation(); rcu_read_lock(); err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); @@ -1944,7 +1944,7 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) if (!ops) return -EINVAL; - if (!bpf_prog_is_dev_bound(prog->aux)) + if (!bpf_prog_is_offloaded(prog->aux)) prog->aux->ops = ops; else prog->aux->ops = &bpf_offload_prog_ops; @@ -2255,7 +2255,7 @@ bool bpf_prog_get_ok(struct bpf_prog *prog, if (prog->type != *attach_type) return false; - if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) + if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) return false; return true; @@ -2598,7 +2598,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) atomic64_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; - if (bpf_prog_is_dev_bound(prog->aux)) { + if (bpf_prog_is_offloaded(prog->aux)) { err = bpf_prog_offload_init(prog, attr); if (err) goto free_prog_sec; @@ -3997,7 +3997,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, return -EFAULT; } - if (bpf_prog_is_dev_bound(prog->aux)) { + if (bpf_prog_is_offloaded(prog->aux)) { err = bpf_prog_offload_info_fill(&info, prog); if (err) return err; @@ -4225,7 +4225,7 @@ static int bpf_map_get_info_by_fd(struct file *file, } info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; - if (bpf_map_is_dev_bound(map)) { + if (bpf_map_is_offloaded(map)) { err = bpf_map_offload_info_fill(&info, map); if (err) return err; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ecf7fed7881c..bba68eefb4b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14099,7 +14099,7 @@ static int do_check(struct bpf_verifier_env *env) env->prev_log_len = env->log.len_used; } - if (bpf_prog_is_dev_bound(env->prog->aux)) { + if (bpf_prog_is_offloaded(env->prog->aux)) { err = bpf_prog_offload_verify_insn(env, env->insn_idx, env->prev_insn_idx); if (err) @@ -14579,7 +14579,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } - if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && + if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); return -EINVAL; @@ -15060,7 +15060,7 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) bpf_prog_offload_remove_insns(env, off, cnt); err = bpf_remove_insns(env->prog, off, cnt); @@ -15141,7 +15141,7 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) bpf_prog_offload_replace_insn(env, i, &ja); memcpy(insn, &ja, sizeof(ja)); @@ -15328,7 +15328,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } } - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) return 0; insn = env->prog->insnsi + delta; @@ -15728,7 +15728,7 @@ static int fixup_call_args(struct bpf_verifier_env *env) int err = 0; if (env->prog->jit_requested && - !bpf_prog_is_dev_bound(env->prog->aux)) { + !bpf_prog_is_offloaded(env->prog->aux)) { err = jit_subprogs(env); if (err == 0) return 0; @@ -17231,7 +17231,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) if (ret < 0) goto skip_full_check; - if (bpf_prog_is_dev_bound(env->prog->aux)) { + if (bpf_prog_is_offloaded(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env->prog); if (ret) goto skip_full_check; @@ -17244,7 +17244,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) ret = do_check_subprogs(env); ret = ret ?: do_check_main(env); - if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) + if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) ret = bpf_prog_offload_finalize(env); skip_full_check: @@ -17279,7 +17279,7 @@ skip_full_check: /* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. */ - if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { + if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) { ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret : false; diff --git a/net/core/dev.c b/net/core/dev.c index cf78f35bc0b9..a37829de6529 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9224,8 +9224,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); return -EEXIST; } - if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { - NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); + if (!offload && bpf_prog_is_offloaded(new_prog->aux)) { + NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported"); return -EINVAL; } if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { diff --git a/net/core/filter.c b/net/core/filter.c index b4547a2c02f4..ed08dbf10338 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8760,7 +8760,7 @@ static bool xdp_is_valid_access(int off, int size, } if (type == BPF_WRITE) { - if (bpf_prog_is_dev_bound(prog->aux)) { + if (bpf_prog_is_offloaded(prog->aux)) { switch (off) { case offsetof(struct xdp_md, rx_queue_index): return __is_valid_xdp_access(off, size); -- cgit v1.2.3 From 2b3486bc2d237ec345b3942b7be5deabf8c8fed1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 19 Jan 2023 14:15:24 -0800 Subject: bpf: Introduce device-bound XDP programs New flag BPF_F_XDP_DEV_BOUND_ONLY plus all the infra to have a way to associate a netdev with a BPF program at load time. netdevsim checks are dropped in favor of generic check in dev_xdp_attach. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Jakub Kicinski Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-6-sdf@google.com Signed-off-by: Martin KaFai Lau --- drivers/net/netdevsim/bpf.c | 4 -- include/linux/bpf.h | 24 +++++++++-- include/uapi/linux/bpf.h | 5 +++ kernel/bpf/core.c | 4 +- kernel/bpf/offload.c | 95 +++++++++++++++++++++++++++++++----------- kernel/bpf/syscall.c | 9 ++-- net/core/dev.c | 5 +++ tools/include/uapi/linux/bpf.h | 5 +++ 8 files changed, 113 insertions(+), 38 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 50854265864d..f60eb97e3a62 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -315,10 +315,6 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); return -EINVAL; } - if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) { - NSIM_EA(bpf->extack, "program bound to different dev"); - return -EINVAL; - } state = bpf->prog->aux->offload->dev_priv; if (WARN_ON(strcmp(state->state, "xlated"))) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1bb525c0130e..b97a05bb47be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1261,7 +1261,8 @@ struct bpf_prog_aux { enum bpf_prog_type saved_dst_prog_type; enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ - bool offload_requested; + bool dev_bound; /* Program is bound to the netdev. */ + bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; bool sleepable; @@ -2451,7 +2452,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); -void bpf_prog_offload_destroy(struct bpf_prog *prog); +void bpf_prog_dev_bound_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); @@ -2479,7 +2480,13 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) -int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); +void bpf_dev_bound_netdev_unregister(struct net_device *dev); + +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +{ + return aux->dev_bound; +} static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) { @@ -2507,12 +2514,21 @@ void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else -static inline int bpf_prog_offload_init(struct bpf_prog *prog, +static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) +{ +} + +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +{ + return false; +} + static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) { return false; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index adae5b168f9d..ba0f0cfb5e42 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1156,6 +1156,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 515f4f08591c..1cf19da3c128 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2553,8 +2553,8 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); - if (bpf_prog_is_offloaded(aux)) - bpf_prog_offload_destroy(aux->prog); + if (bpf_prog_is_dev_bound(aux)) + bpf_prog_dev_bound_destroy(aux->prog); #ifdef CONFIG_PERF_EVENTS if (aux->prog->has_callchain_buf) put_callchain_buffers(); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index deb06498da0b..f767455ed732 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -41,7 +41,7 @@ struct bpf_offload_dev { struct bpf_offload_netdev { struct rhash_head l; struct net_device *netdev; - struct bpf_offload_dev *offdev; + struct bpf_offload_dev *offdev; /* NULL when bound-only */ struct list_head progs; struct list_head maps; struct list_head offdev_netdevs; @@ -89,19 +89,17 @@ static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, INIT_LIST_HEAD(&ondev->progs); INIT_LIST_HEAD(&ondev->maps); - down_write(&bpf_devs_lock); err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params); if (err) { netdev_warn(netdev, "failed to register for BPF offload\n"); - goto err_unlock_free; + goto err_free; } - list_add(&ondev->offdev_netdevs, &offdev->netdevs); - up_write(&bpf_devs_lock); + if (offdev) + list_add(&ondev->offdev_netdevs, &offdev->netdevs); return 0; -err_unlock_free: - up_write(&bpf_devs_lock); +err_free: kfree(ondev); return err; } @@ -149,24 +147,26 @@ static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap) static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev) { - struct bpf_offload_netdev *ondev, *altdev; + struct bpf_offload_netdev *ondev, *altdev = NULL; struct bpf_offloaded_map *offmap, *mtmp; struct bpf_prog_offload *offload, *ptmp; ASSERT_RTNL(); - down_write(&bpf_devs_lock); ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params); if (WARN_ON(!ondev)) - goto unlock; + return; WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params)); - list_del(&ondev->offdev_netdevs); /* Try to move the objects to another netdev of the device */ - altdev = list_first_entry_or_null(&offdev->netdevs, - struct bpf_offload_netdev, - offdev_netdevs); + if (offdev) { + list_del(&ondev->offdev_netdevs); + altdev = list_first_entry_or_null(&offdev->netdevs, + struct bpf_offload_netdev, + offdev_netdevs); + } + if (altdev) { list_for_each_entry(offload, &ondev->progs, offloads) offload->netdev = altdev->netdev; @@ -185,11 +185,9 @@ static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, WARN_ON(!list_empty(&ondev->progs)); WARN_ON(!list_empty(&ondev->maps)); kfree(ondev); -unlock: - up_write(&bpf_devs_lock); } -int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) +int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) { struct bpf_offload_netdev *ondev; struct bpf_prog_offload *offload; @@ -199,7 +197,11 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) attr->prog_type != BPF_PROG_TYPE_XDP) return -EINVAL; - if (attr->prog_flags) + if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY) + return -EINVAL; + + if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS && + attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY) return -EINVAL; offload = kzalloc(sizeof(*offload), GFP_USER); @@ -214,11 +216,23 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) if (err) goto err_maybe_put; + prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY); + down_write(&bpf_devs_lock); ondev = bpf_offload_find_netdev(offload->netdev); if (!ondev) { - err = -EINVAL; - goto err_unlock; + if (bpf_prog_is_offloaded(prog->aux)) { + err = -EINVAL; + goto err_unlock; + } + + /* When only binding to the device, explicitly + * create an entry in the hashtable. + */ + err = __bpf_offload_dev_netdev_register(NULL, offload->netdev); + if (err) + goto err_unlock; + ondev = bpf_offload_find_netdev(offload->netdev); } offload->offdev = ondev->offdev; prog->aux->offload = offload; @@ -321,12 +335,25 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) up_read(&bpf_devs_lock); } -void bpf_prog_offload_destroy(struct bpf_prog *prog) +void bpf_prog_dev_bound_destroy(struct bpf_prog *prog) { + struct bpf_offload_netdev *ondev; + struct net_device *netdev; + + rtnl_lock(); down_write(&bpf_devs_lock); - if (prog->aux->offload) + if (prog->aux->offload) { + list_del_init(&prog->aux->offload->offloads); + + netdev = prog->aux->offload->netdev; __bpf_prog_offload_destroy(prog); + + ondev = bpf_offload_find_netdev(netdev); + if (!ondev->offdev && list_empty(&ondev->progs)) + __bpf_offload_dev_netdev_unregister(NULL, netdev); + } up_write(&bpf_devs_lock); + rtnl_unlock(); } static int bpf_prog_offload_translate(struct bpf_prog *prog) @@ -621,7 +648,7 @@ static bool __bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_offload_netdev *ondev1, *ondev2; struct bpf_prog_offload *offload; - if (!bpf_prog_is_offloaded(prog->aux)) + if (!bpf_prog_is_dev_bound(prog->aux)) return false; offload = prog->aux->offload; @@ -667,14 +694,21 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map) int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev) { - return __bpf_offload_dev_netdev_register(offdev, netdev); + int err; + + down_write(&bpf_devs_lock); + err = __bpf_offload_dev_netdev_register(offdev, netdev); + up_write(&bpf_devs_lock); + return err; } EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register); void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev) { + down_write(&bpf_devs_lock); __bpf_offload_dev_netdev_unregister(offdev, netdev); + up_write(&bpf_devs_lock); } EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); @@ -708,6 +742,19 @@ void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev) } EXPORT_SYMBOL_GPL(bpf_offload_dev_priv); +void bpf_dev_bound_netdev_unregister(struct net_device *dev) +{ + struct bpf_offload_netdev *ondev; + + ASSERT_RTNL(); + + down_write(&bpf_devs_lock); + ondev = bpf_offload_find_netdev(dev); + if (ondev && !ondev->offdev) + __bpf_offload_dev_netdev_unregister(NULL, ondev->netdev); + up_write(&bpf_devs_lock); +} + static int __init bpf_offload_init(void) { return rhashtable_init(&offdevs, &offdevs_params); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5e90b697f908..fdf4ff3d5a7f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2491,7 +2491,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) BPF_F_TEST_STATE_FREQ | BPF_F_SLEEPABLE | BPF_F_TEST_RND_HI32 | - BPF_F_XDP_HAS_FRAGS)) + BPF_F_XDP_HAS_FRAGS | + BPF_F_XDP_DEV_BOUND_ONLY)) return -EINVAL; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && @@ -2575,7 +2576,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) prog->aux->attach_btf = attach_btf; prog->aux->attach_btf_id = attr->attach_btf_id; prog->aux->dst_prog = dst_prog; - prog->aux->offload_requested = !!attr->prog_ifindex; + prog->aux->dev_bound = !!attr->prog_ifindex; prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; @@ -2598,8 +2599,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) atomic64_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; - if (bpf_prog_is_offloaded(prog->aux)) { - err = bpf_prog_offload_init(prog, attr); + if (bpf_prog_is_dev_bound(prog->aux)) { + err = bpf_prog_dev_bound_init(prog, attr); if (err) goto free_prog_sec; } diff --git a/net/core/dev.c b/net/core/dev.c index a37829de6529..e66da626df84 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9228,6 +9228,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported"); return -EINVAL; } + if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) { + NL_SET_ERR_MSG(extack, "Program bound to different device"); + return -EINVAL; + } if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); return -EINVAL; @@ -10830,6 +10834,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_shutdown(dev); dev_xdp_uninstall(dev); + bpf_dev_bound_netdev_unregister(dev); netdev_offload_xstats_disable_all(dev); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 142b81bcbb2e..7f024ac22edd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1156,6 +1156,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ -- cgit v1.2.3 From 3176eb82681ec9c8af31c6588ddedcc6cfb9e445 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Jan 2023 13:07:43 +0100 Subject: net: avoid irqsave in skb_defer_free_flush The spin_lock irqsave/restore API variant in skb_defer_free_flush can be replaced with the faster spin_lock irq variant, which doesn't need to read and restore the CPU flags. Using the unconditional irq "disable/enable" API variant is safe, because the skb_defer_free_flush() function is only called during NAPI-RX processing in net_rx_action(), where it is known the IRQs are enabled. Expected gain is 14 cycles from avoiding reading and restoring CPU flags in a spin_lock_irqsave/restore operation, measured via a microbencmark kernel module[1] on CPU E5-1650 v4 @ 3.60GHz. Microbenchmark overhead of spin_lock+unlock: - spin_lock_unlock_irq cost: 34 cycles(tsc) 9.486 ns - spin_lock_unlock_irqsave cost: 48 cycles(tsc) 13.567 ns We don't expect to see a measurable packet performance gain, as skb_defer_free_flush() is called infrequently once per NIC device NAPI bulk cycle and conditionally only if SKBs have been deferred by other CPUs via skb_attempt_defer_free(). [1] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/lib/time_bench_sample.c Reviewed-by: Jacob Keller Signed-off-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/167421646327.1321776.7390743166998776914.stgit@firesoul Signed-off-by: Jakub Kicinski --- net/core/dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cf78f35bc0b9..9c60190fe352 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6616,17 +6616,16 @@ static int napi_threaded_poll(void *data) static void skb_defer_free_flush(struct softnet_data *sd) { struct sk_buff *skb, *next; - unsigned long flags; /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ if (!READ_ONCE(sd->defer_list)) return; - spin_lock_irqsave(&sd->defer_lock, flags); + spin_lock_irq(&sd->defer_lock); skb = sd->defer_list; sd->defer_list = NULL; sd->defer_count = 0; - spin_unlock_irqrestore(&sd->defer_lock, flags); + spin_unlock_irq(&sd->defer_lock); while (skb != NULL) { next = skb->next; -- cgit v1.2.3 From 9eefedd58ae1daece2ba907849a44db2941fb4b0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Jan 2023 10:58:38 -0500 Subject: net: add gso_ipv4_max_size and gro_ipv4_max_size per device This patch introduces gso_ipv4_max_size and gro_ipv4_max_size per device and adds netlink attributes for them, so that IPV4 BIG TCP can be guarded by a separate tunable in the next patch. To not break the old application using "gso/gro_max_size" for IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size" in netif_set_gso/gro_max_size() if the new size isn't greater than GSO_LEGACY_MAX_SIZE, so that nothing will change even if userspace doesn't realize the new netlink attributes. Signed-off-by: Xin Long Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++++ include/uapi/linux/if_link.h | 3 +++ net/core/dev.c | 4 ++++ net/core/dev.h | 18 ++++++++++++++++++ net/core/rtnetlink.c | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2466afa25078..d5ef4c1fedd2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1964,6 +1964,8 @@ enum netdev_ml_priv_type { * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO * @tso_max_segs: Device (as in HW) limit on the max TSO segment count + * @gso_ipv4_max_size: Maximum size of generic segmentation offload, + * for IPv4. * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -2004,6 +2006,8 @@ enum netdev_ml_priv_type { * keep a list of interfaces to be deleted. * @gro_max_size: Maximum size of aggregated packet in generic * receive offload (GRO) + * @gro_ipv4_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO), for IPv4. * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2207,6 +2211,7 @@ struct net_device { */ #define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -2330,6 +2335,7 @@ struct net_device { u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; + unsigned int gso_ipv4_max_size; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1021a7e47a86..02b87e4c65be 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -374,6 +374,9 @@ enum { IFLA_DEVLINK_PORT, + IFLA_GSO_IPV4_MAX_SIZE, + IFLA_GRO_IPV4_MAX_SIZE, + __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index f72f5c4ee7e2..bb42150a38ec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3001,6 +3001,8 @@ void netif_set_tso_max_size(struct net_device *dev, unsigned int size) dev->tso_max_size = min(GSO_MAX_SIZE, size); if (size < READ_ONCE(dev->gso_max_size)) netif_set_gso_max_size(dev, size); + if (size < READ_ONCE(dev->gso_ipv4_max_size)) + netif_set_gso_ipv4_max_size(dev, size); } EXPORT_SYMBOL(netif_set_tso_max_size); @@ -10614,6 +10616,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_LEGACY_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->gro_max_size = GRO_LEGACY_MAX_SIZE; + dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE; + dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE; dev->tso_max_size = TSO_LEGACY_MAX_SIZE; dev->tso_max_segs = TSO_MAX_SEGS; dev->upper_level = 1; diff --git a/net/core/dev.h b/net/core/dev.h index 814ed5b7b960..a065b7571441 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -100,6 +100,8 @@ static inline void netif_set_gso_max_size(struct net_device *dev, { /* dev->gso_max_size is read locklessly from sk_setup_caps() */ WRITE_ONCE(dev->gso_max_size, size); + if (size <= GSO_LEGACY_MAX_SIZE) + WRITE_ONCE(dev->gso_ipv4_max_size, size); } static inline void netif_set_gso_max_segs(struct net_device *dev, @@ -114,6 +116,22 @@ static inline void netif_set_gro_max_size(struct net_device *dev, { /* This pairs with the READ_ONCE() in skb_gro_receive() */ WRITE_ONCE(dev->gro_max_size, size); + if (size <= GRO_LEGACY_MAX_SIZE) + WRITE_ONCE(dev->gro_ipv4_max_size, size); +} + +static inline void netif_set_gso_ipv4_max_size(struct net_device *dev, + unsigned int size) +{ + /* dev->gso_ipv4_max_size is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_ipv4_max_size, size); +} + +static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_ipv4_max_size, size); } #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 64289bc98887..b9f584955b77 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1074,6 +1074,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ @@ -1807,6 +1809,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || + nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) || + nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) || nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || #ifdef CONFIG_RPS @@ -1968,6 +1972,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, [IFLA_ALLMULTI] = { .type = NLA_REJECT }, + [IFLA_GSO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2883,6 +2889,29 @@ static int do_setlink(const struct sk_buff *skb, } } + if (tb[IFLA_GSO_IPV4_MAX_SIZE]) { + u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]); + + if (max_size > dev->tso_max_size) { + err = -EINVAL; + goto errout; + } + + if (dev->gso_ipv4_max_size ^ max_size) { + netif_set_gso_ipv4_max_size(dev, max_size); + status |= DO_SETLINK_MODIFIED; + } + } + + if (tb[IFLA_GRO_IPV4_MAX_SIZE]) { + u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]); + + if (dev->gro_ipv4_max_size ^ gro_max_size) { + netif_set_gro_ipv4_max_size(dev, gro_max_size); + status |= DO_SETLINK_MODIFIED; + } + } + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -3325,6 +3354,10 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); if (tb[IFLA_GRO_MAX_SIZE]) netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); + if (tb[IFLA_GSO_IPV4_MAX_SIZE]) + netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE])); + if (tb[IFLA_GRO_IPV4_MAX_SIZE]) + netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE])); return dev; } -- cgit v1.2.3 From d3d854fd6a1d97157f790604e07f6386e8df8fe4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Feb 2023 11:24:17 +0100 Subject: netdev-genl: create a simple family for netdev stuff Add a Netlink spec-compatible family for netdevs. This is a very simple implementation without much thought going into it. It allows us to reap all the benefits of Netlink specs, one can use the generic client to issue the commands: $ ./cli.py --spec netdev.yaml --dump dev_get [{'ifindex': 1, 'xdp-features': set()}, {'ifindex': 2, 'xdp-features': {'basic', 'ndo-xmit', 'redirect'}}, {'ifindex': 3, 'xdp-features': {'rx-sg'}}] the generic python library does not have flags-by-name support, yet, but we also don't have to carry strings in the messages, as user space can get the names from the spec. Acked-by: Jesper Dangaard Brouer Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Marek Majtyka Signed-off-by: Marek Majtyka Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/327ad9c9868becbe1e601b580c962549c8cd81f2.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- Documentation/netlink/specs/netdev.yaml | 100 ++++++++++++++++++ include/linux/netdevice.h | 3 + include/net/xdp.h | 3 + include/uapi/linux/netdev.h | 59 +++++++++++ net/core/Makefile | 3 +- net/core/dev.c | 1 + net/core/netdev-genl-gen.c | 48 +++++++++ net/core/netdev-genl-gen.h | 23 ++++ net/core/netdev-genl.c | 179 ++++++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 59 +++++++++++ 10 files changed, 477 insertions(+), 1 deletion(-) create mode 100644 Documentation/netlink/specs/netdev.yaml create mode 100644 include/uapi/linux/netdev.h create mode 100644 net/core/netdev-genl-gen.c create mode 100644 net/core/netdev-genl-gen.h create mode 100644 net/core/netdev-genl.c create mode 100644 tools/include/uapi/linux/netdev.h (limited to 'net/core/dev.c') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml new file mode 100644 index 000000000000..b4dcdae54ffd --- /dev/null +++ b/Documentation/netlink/specs/netdev.yaml @@ -0,0 +1,100 @@ +name: netdev + +doc: + netdev configuration over generic netlink. + +definitions: + - + type: flags + name: xdp-act + entries: + - + name: basic + doc: + XDP feautues set supported by all drivers + (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + - + name: redirect + doc: + The netdev supports XDP_REDIRECT + - + name: ndo-xmit + doc: + This feature informs if netdev implements ndo_xdp_xmit callback. + - + name: xsk-zerocopy + doc: + This feature informs if netdev supports AF_XDP in zero copy mode. + - + name: hw-offload + doc: + This feature informs if netdev supports XDP hw oflloading. + - + name: rx-sg + doc: + This feature informs if netdev implements non-linear XDP buffer + support in the driver napi callback. + - + name: ndo-xmit-sg + doc: + This feature informs if netdev implements non-linear XDP buffer + support in ndo_xdp_xmit callback. + +attribute-sets: + - + name: dev + attributes: + - + name: ifindex + doc: netdev ifindex + type: u32 + value: 1 + checks: + min: 1 + - + name: pad + type: pad + - + name: xdp-features + doc: Bitmask of enabled xdp-features. + type: u64 + enum: xdp-act + enum-as-flags: true + +operations: + list: + - + name: dev-get + doc: Get / dump information about a netdev. + value: 1 + attribute-set: dev + do: + request: + attributes: + - ifindex + reply: &dev-all + attributes: + - ifindex + - xdp-features + dump: + reply: *dev-all + - + name: dev-add-ntf + doc: Notification about device appearing. + notify: dev-get + mcgrp: mgmt + - + name: dev-del-ntf + doc: Notification about device disappearing. + notify: dev-get + mcgrp: mgmt + - + name: dev-change-ntf + doc: Notification about device configuration being changed. + notify: dev-get + mcgrp: mgmt + +mcast-groups: + list: + - + name: mgmt diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2466afa25078..0f7967591288 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -2055,6 +2056,7 @@ struct net_device { /* Read-mostly cache-line for fast-path access */ unsigned int flags; + xdp_features_t xdp_features; unsigned long long priv_flags; const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; @@ -2839,6 +2841,7 @@ enum netdev_cmd { NETDEV_OFFLOAD_XSTATS_DISABLE, NETDEV_OFFLOAD_XSTATS_REPORT_USED, NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, + NETDEV_XDP_FEAT_CHANGE, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); diff --git a/include/net/xdp.h b/include/net/xdp.h index 91292aa13bc0..8d1c86914f4c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -7,6 +7,7 @@ #define __LINUX_NET_XDP_H__ #include /* skb_shared_info */ +#include /** * DOC: XDP RX-queue information @@ -43,6 +44,8 @@ enum xdp_mem_type { MEM_TYPE_MAX, }; +typedef u32 xdp_features_t; + /* XDP flags for ndo_xdp_xmit */ #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h new file mode 100644 index 000000000000..9ee459872600 --- /dev/null +++ b/include/uapi/linux/netdev.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NETDEV_H +#define _UAPI_LINUX_NETDEV_H + +#define NETDEV_FAMILY_NAME "netdev" +#define NETDEV_FAMILY_VERSION 1 + +/** + * enum netdev_xdp_act + * @NETDEV_XDP_ACT_BASIC: XDP feautues set supported by all drivers + * (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT + * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements + * ndo_xdp_xmit callback. + * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP + * in zero copy mode. + * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw + * oflloading. + * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear + * XDP buffer support in the driver napi callback. + * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements + * non-linear XDP buffer support in ndo_xdp_xmit callback. + */ +enum netdev_xdp_act { + NETDEV_XDP_ACT_BASIC = 1, + NETDEV_XDP_ACT_REDIRECT = 2, + NETDEV_XDP_ACT_NDO_XMIT = 4, + NETDEV_XDP_ACT_XSK_ZEROCOPY = 8, + NETDEV_XDP_ACT_HW_OFFLOAD = 16, + NETDEV_XDP_ACT_RX_SG = 32, + NETDEV_XDP_ACT_NDO_XMIT_SG = 64, +}; + +enum { + NETDEV_A_DEV_IFINDEX = 1, + NETDEV_A_DEV_PAD, + NETDEV_A_DEV_XDP_FEATURES, + + __NETDEV_A_DEV_MAX, + NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) +}; + +enum { + NETDEV_CMD_DEV_GET = 1, + NETDEV_CMD_DEV_ADD_NTF, + NETDEV_CMD_DEV_DEL_NTF, + NETDEV_CMD_DEV_CHANGE_NTF, + + __NETDEV_CMD_MAX, + NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) +}; + +#define NETDEV_MCGRP_MGMT "mgmt" + +#endif /* _UAPI_LINUX_NETDEV_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 10edd66a8a37..8f367813bc68 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o xdp.o flow_offload.o gro.o + fib_notifier.o xdp.o flow_offload.o gro.o \ + netdev-genl.o netdev-genl-gen.o obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o diff --git a/net/core/dev.c b/net/core/dev.c index f72f5c4ee7e2..9ac0eeb2c8cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1614,6 +1614,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE) N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA) + N(XDP_FEAT_CHANGE) } #undef N return "UNKNOWN_NETDEV_EVENT"; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c new file mode 100644 index 000000000000..48812ec843f5 --- /dev/null +++ b/net/core/netdev-genl-gen.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN kernel source */ + +#include +#include + +#include "netdev-genl-gen.h" + +#include + +/* NETDEV_CMD_DEV_GET - do */ +static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = { + [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + +/* Ops table for netdev */ +static const struct genl_split_ops netdev_nl_ops[2] = { + { + .cmd = NETDEV_CMD_DEV_GET, + .doit = netdev_nl_dev_get_doit, + .policy = netdev_dev_get_nl_policy, + .maxattr = NETDEV_A_DEV_IFINDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_DEV_GET, + .dumpit = netdev_nl_dev_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, +}; + +static const struct genl_multicast_group netdev_nl_mcgrps[] = { + [NETDEV_NLGRP_MGMT] = { "mgmt", }, +}; + +struct genl_family netdev_nl_family __ro_after_init = { + .name = NETDEV_FAMILY_NAME, + .version = NETDEV_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = netdev_nl_ops, + .n_split_ops = ARRAY_SIZE(netdev_nl_ops), + .mcgrps = netdev_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), +}; diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h new file mode 100644 index 000000000000..b16dc7e026bb --- /dev/null +++ b/net/core/netdev-genl-gen.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_NETDEV_GEN_H +#define _LINUX_NETDEV_GEN_H + +#include +#include + +#include + +int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); + +enum { + NETDEV_NLGRP_MGMT, +}; + +extern struct genl_family netdev_nl_family; + +#endif /* _LINUX_NETDEV_GEN_H */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c new file mode 100644 index 000000000000..a4270fafdf11 --- /dev/null +++ b/net/core/netdev-genl.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include + +#include "netdev-genl-gen.h" + +static int +netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, + u32 portid, u32 seq, int flags, u32 cmd) +{ + void *hdr; + + hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, + netdev->xdp_features, NETDEV_A_DEV_PAD)) { + genlmsg_cancel(rsp, hdr); + return -EINVAL; + } + + genlmsg_end(rsp, hdr); + + return 0; +} + +static void +netdev_genl_dev_notify(struct net_device *netdev, int cmd) +{ + struct sk_buff *ntf; + + if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev), + NETDEV_NLGRP_MGMT)) + return; + + ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ntf) + return; + + if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) { + nlmsg_free(ntf); + return; + } + + genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf, + 0, NETDEV_NLGRP_MGMT, GFP_KERNEL); +} + +int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *netdev; + struct sk_buff *rsp; + u32 ifindex; + int err; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + rtnl_lock(); + + netdev = __dev_get_by_index(genl_info_net(info), ifindex); + if (netdev) + err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid, + info->snd_seq, 0, info->genlhdr->cmd); + else + err = -ENODEV; + + rtnl_unlock(); + + if (err) + goto err_free_msg; + + return genlmsg_reply(rsp, info); + +err_free_msg: + nlmsg_free(rsp); + return err; +} + +int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + int idx = 0, s_idx; + int h, s_h; + int err; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + rtnl_lock(); + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + struct hlist_head *head; + + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(netdev, head, index_hlist) { + if (idx < s_idx) + goto cont; + err = netdev_nl_dev_fill(netdev, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NETDEV_CMD_DEV_GET); + if (err < 0) + break; +cont: + idx++; + } + } + + rtnl_unlock(); + + if (err != -EMSGSIZE) + return err; + + cb->args[1] = idx; + cb->args[0] = h; + cb->seq = net->dev_base_seq; + + return skb->len; +} + +static int netdev_genl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_REGISTER: + netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); + break; + case NETDEV_UNREGISTER: + netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); + break; + case NETDEV_XDP_FEAT_CHANGE: + netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block netdev_genl_nb = { + .notifier_call = netdev_genl_netdevice_event, +}; + +static int __init netdev_genl_init(void) +{ + int err; + + err = register_netdevice_notifier(&netdev_genl_nb); + if (err) + return err; + + err = genl_register_family(&netdev_nl_family); + if (err) + goto err_unreg_ntf; + + return 0; + +err_unreg_ntf: + unregister_netdevice_notifier(&netdev_genl_nb); + return err; +} + +subsys_initcall(netdev_genl_init); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h new file mode 100644 index 000000000000..9ee459872600 --- /dev/null +++ b/tools/include/uapi/linux/netdev.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NETDEV_H +#define _UAPI_LINUX_NETDEV_H + +#define NETDEV_FAMILY_NAME "netdev" +#define NETDEV_FAMILY_VERSION 1 + +/** + * enum netdev_xdp_act + * @NETDEV_XDP_ACT_BASIC: XDP feautues set supported by all drivers + * (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT + * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements + * ndo_xdp_xmit callback. + * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP + * in zero copy mode. + * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw + * oflloading. + * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear + * XDP buffer support in the driver napi callback. + * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements + * non-linear XDP buffer support in ndo_xdp_xmit callback. + */ +enum netdev_xdp_act { + NETDEV_XDP_ACT_BASIC = 1, + NETDEV_XDP_ACT_REDIRECT = 2, + NETDEV_XDP_ACT_NDO_XMIT = 4, + NETDEV_XDP_ACT_XSK_ZEROCOPY = 8, + NETDEV_XDP_ACT_HW_OFFLOAD = 16, + NETDEV_XDP_ACT_RX_SG = 32, + NETDEV_XDP_ACT_NDO_XMIT_SG = 64, +}; + +enum { + NETDEV_A_DEV_IFINDEX = 1, + NETDEV_A_DEV_PAD, + NETDEV_A_DEV_XDP_FEATURES, + + __NETDEV_A_DEV_MAX, + NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) +}; + +enum { + NETDEV_CMD_DEV_GET = 1, + NETDEV_CMD_DEV_ADD_NTF, + NETDEV_CMD_DEV_DEL_NTF, + NETDEV_CMD_DEV_CHANGE_NTF, + + __NETDEV_CMD_MAX, + NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) +}; + +#define NETDEV_MCGRP_MGMT "mgmt" + +#endif /* _UAPI_LINUX_NETDEV_H */ -- cgit v1.2.3 From 9b55d3f0a69af649c62cbc2633e6d695bb3cc583 Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Fri, 10 Feb 2023 13:36:44 +0100 Subject: net: Fix unwanted sign extension in netdev_stats_to_stats64() When converting net_device_stats to rtnl_link_stats64 sign extension is triggered on ILP32 machines as 6c1c509778 changed the previous "ulong -> u64" conversion to "long -> u64" by accessing the net_device_stats fields through a (signed) atomic_long_t. This causes for example the received bytes counter to jump to 16EiB after having received 2^31 bytes. Casting the atomic value to "unsigned long" beforehand converting it into u64 avoids this. Fixes: 6c1c5097781f ("net: add atomic_long_t to net_device_stats fields") Signed-off-by: Felix Riemann Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b76fb37b381e..ea0a7bac1e5c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10375,7 +10375,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = atomic_long_read(&src[i]); + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); -- cgit v1.2.3 From 802dcbd6f30feaa7c96a1fb4ecb1db57082df9d7 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 14 Feb 2023 13:01:16 -0800 Subject: net/core: print message for allmulticast When the user sets or clears the IFF_ALLMULTI flag in the netdev, there are no log messages printed to the kernel log to indicate anything happened. This is inexplicably different from most other dev->flags changes, and could suprise the user. Typically this occurs from user-space when a user: ip link set eth0 allmulticast However, other devices like bridge set allmulticast as well, and many other flows might trigger entry into allmulticast as well. The new message uses the standard netdev_info print and looks like: [ 413.246110] ixgbe 0000:17:00.0 eth0: entered allmulticast mode [ 415.977184] ixgbe 0000:17:00.0 eth0: left allmulticast mode Signed-off-by: Jesse Brandeburg Signed-off-by: Paolo Abeni --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7307a0c15c9f..ad1e6482e1c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8391,6 +8391,8 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) } } if (dev->flags ^ old_flags) { + netdev_info(dev, "%s allmulticast mode\n", + dev->flags & IFF_ALLMULTI ? "entered" : "left"); dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); if (notify) -- cgit v1.2.3 From 3ba0bf47edf955d6f52fdb16b54acd1932cb9445 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 14 Feb 2023 13:01:17 -0800 Subject: net/core: refactor promiscuous mode message The kernel stack can be more consistent by printing the IFF_PROMISC aka promiscuous enable/disable messages with the standard netdev_info message which can include bus and driver info as well as the device. typical command usage from user space looks like: ip link set eth0 promisc But lots of utilities such as bridge, tcpdump, etc put the interface into promiscuous mode. old message: [ 406.034418] device eth0 entered promiscuous mode [ 408.424703] device eth0 left promiscuous mode new message: [ 406.034431] ice 0000:17:00.0 eth0: entered promiscuous mode [ 408.424715] ice 0000:17:00.0 eth0: left promiscuous mode Signed-off-by: Jesse Brandeburg Signed-off-by: Paolo Abeni --- net/core/dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ad1e6482e1c1..357081b0113c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8321,9 +8321,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) } } if (dev->flags != old_flags) { - pr_info("device %s %s promiscuous mode\n", - dev->name, - dev->flags & IFF_PROMISC ? "entered" : "left"); + netdev_info(dev, "%s promiscuous mode\n", + dev->flags & IFF_PROMISC ? "entered" : "left"); if (audit_enabled) { current_uid_gid(&uid, &gid); audit_log(audit_context(), GFP_ATOMIC, -- cgit v1.2.3 From b20b8aec6ffc07bb547966b356780cd344f20f5b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Feb 2023 09:31:39 +0200 Subject: devlink: Fix netdev notifier chain corruption Cited commit changed devlink to register its netdev notifier block on the global netdev notifier chain instead of on the per network namespace one. However, when changing the network namespace of the devlink instance, devlink still tries to unregister its notifier block from the chain of the old namespace and register it on the chain of the new namespace. This results in corruption of the notifier chains, as the same notifier block is registered on two different chains: The global one and the per network namespace one. In turn, this causes other problems such as the inability to dismantle namespaces due to netdev reference count issues. Fix by preventing devlink from moving its notifier block between namespaces. Reproducer: # echo "10 1" > /sys/bus/netdevsim/new_device # ip netns add test123 # devlink dev reload netdevsim/netdevsim10 netns test123 # ip netns del test123 [ 71.935619] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 71.938348] leaked reference. Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230215073139.1360108-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 -- net/core/dev.c | 8 -------- net/core/devlink.c | 5 +---- 3 files changed, 1 insertion(+), 14 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..e6e02184c25a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2839,8 +2839,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); diff --git a/net/core/dev.c b/net/core/dev.c index ea0a7bac1e5c..f23e287602b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1869,14 +1869,6 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb) -{ - rtnl_lock(); - __move_netdevice_notifier_net(src_net, dst_net, nb); - rtnl_unlock(); -} - int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) diff --git a/net/core/devlink.c b/net/core/devlink.c index 909a10e4b0dd..0bfc144df8b9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4742,11 +4742,8 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, if (err) return err; - if (dest_net && !net_eq(dest_net, curr_net)) { - move_netdevice_notifier_net(curr_net, dest_net, - &devlink->netdevice_nb); + if (dest_net && !net_eq(dest_net, curr_net)) write_pnet(&devlink->_net, dest_net); - } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); -- cgit v1.2.3 From dd1b527831a3ed659afa01b672d8e1f7e6ca95a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Feb 2023 15:47:18 +0000 Subject: net: add location to trace_consume_skb() kfree_skb() includes the location, it makes sense to add it to consume_skb() as well. After patch: taskd_EventMana 8602 [004] 420.406239: skb:consume_skb: skbaddr=0xffff893a4a6d0500 location=unix_stream_read_generic swapper 0 [011] 422.732607: skb:consume_skb: skbaddr=0xffff89597f68cee0 location=mlx4_en_free_tx_desc discipline 9141 [043] 423.065653: skb:consume_skb: skbaddr=0xffff893a487e9c00 location=skb_consume_udp swapper 0 [010] 423.073166: skb:consume_skb: skbaddr=0xffff8949ce9cdb00 location=icmpv6_rcv borglet 8672 [014] 425.628256: skb:consume_skb: skbaddr=0xffff8949c42e9400 location=netlink_dump swapper 0 [028] 426.263317: skb:consume_skb: skbaddr=0xffff893b1589dce0 location=net_rx_action wget 14339 [009] 426.686380: skb:consume_skb: skbaddr=0xffff893a51b552e0 location=tcp_rcv_state_process Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/trace/events/skb.h | 10 ++++++---- net/core/dev.c | 2 +- net/core/skbuff.c | 8 ++++---- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 25ab1ff9423d..07e0715628ec 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -53,19 +53,21 @@ TRACE_EVENT(kfree_skb, TRACE_EVENT(consume_skb, - TP_PROTO(struct sk_buff *skb), + TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb), + TP_ARGS(skb, location), TP_STRUCT__entry( - __field( void *, skbaddr ) + __field( void *, skbaddr) + __field( void *, location) ), TP_fast_assign( __entry->skbaddr = skb; + __entry->location = location; ), - TP_printk("skbaddr=%p", __entry->skbaddr) + TP_printk("skbaddr=%p location=%pS", __entry->skbaddr, __entry->location) ); TRACE_EVENT(skb_copy_datagram_iovec, diff --git a/net/core/dev.c b/net/core/dev.c index 5687b528d4c1..18dc8d75ead9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5019,7 +5019,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) WARN_ON(refcount_read(&skb->users)); if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) - trace_consume_skb(skb); + trace_consume_skb(skb, net_tx_action); else trace_kfree_skb(skb, net_tx_action, SKB_DROP_REASON_NOT_SPECIFIED); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 98ebce9f6a51..eb7d33b41e71 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -991,7 +991,7 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX); if (reason == SKB_CONSUMED) - trace_consume_skb(skb); + trace_consume_skb(skb, __builtin_return_address(0)); else trace_kfree_skb(skb, __builtin_return_address(0), reason); return true; @@ -1189,7 +1189,7 @@ void consume_skb(struct sk_buff *skb) if (!skb_unref(skb)) return; - trace_consume_skb(skb); + trace_consume_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); @@ -1204,7 +1204,7 @@ EXPORT_SYMBOL(consume_skb); */ void __consume_stateless_skb(struct sk_buff *skb) { - trace_consume_skb(skb); + trace_consume_skb(skb, __builtin_return_address(0)); skb_release_data(skb, SKB_CONSUMED); kfree_skbmem(skb); } @@ -1260,7 +1260,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; /* if reaching here SKB is ready to free */ - trace_consume_skb(skb); + trace_consume_skb(skb, __builtin_return_address(0)); /* if SKB is a clone, don't handle this case */ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { -- cgit v1.2.3 From ac3ad19584b26fae9ac86e4faebe790becc74491 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Feb 2023 08:38:45 +0000 Subject: net: fix __dev_kfree_skb_any() vs drop monitor dev_kfree_skb() is aliased to consume_skb(). When a driver is dropping a packet by calling dev_kfree_skb_any() we should propagate the drop reason instead of pretending the packet was consumed. Note: Now we have enum skb_drop_reason we could remove enum skb_free_reason (for linux-6.4) v2: added an unlikely(), suggested by Yunsheng Lin. Fixes: e6247027e517 ("net: introduce dev_consume_skb_any()") Signed-off-by: Eric Dumazet Cc: Yunsheng Lin Reviewed-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 18dc8d75ead9..253584777101 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3134,8 +3134,10 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { if (in_hardirq() || irqs_disabled()) __dev_kfree_skb_irq(skb, reason); + else if (unlikely(reason == SKB_REASON_DROPPED)) + kfree_skb(skb); else - dev_kfree_skb(skb); + consume_skb(skb); } EXPORT_SYMBOL(__dev_kfree_skb_any); -- cgit v1.2.3