diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/bpf_sk_storage.c | 260 | ||||
-rw-r--r-- | net/core/dev.c | 582 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 29 | ||||
-rw-r--r-- | net/core/devlink.c | 653 | ||||
-rw-r--r-- | net/core/fib_rules.c | 31 | ||||
-rw-r--r-- | net/core/filter.c | 386 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 17 | ||||
-rw-r--r-- | net/core/flow_offload.c | 12 | ||||
-rw-r--r-- | net/core/neighbour.c | 1 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 118 | ||||
-rw-r--r-- | net/core/skbuff.c | 7 | ||||
-rw-r--r-- | net/core/sock.c | 119 | ||||
-rw-r--r-- | net/core/sock_map.c | 88 | ||||
-rw-r--r-- | net/core/tso.c | 44 | ||||
-rw-r--r-- | net/core/xdp.c | 9 |
15 files changed, 1831 insertions, 525 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d2c4d16dadba..d3377c90a291 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -6,13 +6,12 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> +#include <linux/btf_ids.h> #include <net/bpf_sk_storage.h> #include <net/sock.h> #include <uapi/linux/sock_diag.h> #include <uapi/linux/btf.h> -static atomic_t cache_idx; - #define SK_STORAGE_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_CLONE) @@ -81,6 +80,9 @@ struct bpf_sk_storage_elem { #define SDATA(_SELEM) (&(_SELEM)->sdata) #define BPF_SK_STORAGE_CACHE_SIZE 16 +static DEFINE_SPINLOCK(cache_idx_lock); +static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE]; + struct bpf_sk_storage { struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; struct hlist_head list; /* List of bpf_sk_storage_elem */ @@ -512,6 +514,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) return 0; } +static u16 cache_idx_get(void) +{ + u64 min_usage = U64_MAX; + u16 i, res = 0; + + spin_lock(&cache_idx_lock); + + for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) { + if (cache_idx_usage_counts[i] < min_usage) { + min_usage = cache_idx_usage_counts[i]; + res = i; + + /* Found a free cache_idx */ + if (!min_usage) + break; + } + } + cache_idx_usage_counts[res]++; + + spin_unlock(&cache_idx_lock); + + return res; +} + +static void cache_idx_free(u16 idx) +{ + spin_lock(&cache_idx_lock); + cache_idx_usage_counts[idx]--; + spin_unlock(&cache_idx_lock); +} + /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { @@ -560,6 +593,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) smap = (struct bpf_sk_storage_map *)map; + cache_idx_free(smap->cache_idx); + /* Note that this map might be concurrently cloned from * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * RCU read section to finish before proceeding. New RCU @@ -673,8 +708,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) } smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; - smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % - BPF_SK_STORAGE_CACHE_SIZE; + smap->cache_idx = cache_idx_get(); return &smap->map; } @@ -886,6 +920,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) return -ENOENT; } +static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_alloc_check = bpf_sk_storage_map_alloc_check, .map_alloc = bpf_sk_storage_map_alloc, @@ -895,6 +930,8 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_sk_storage_map_check_btf, + .map_btf_name = "bpf_sk_storage_map", + .map_btf_id = &sk_storage_map_btf_id, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { @@ -907,6 +944,16 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; +const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { + .func = bpf_sk_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_sk_storage_delete_proto = { .func = bpf_sk_storage_delete, .gpl_only = false, @@ -1181,3 +1228,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, return err; } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); + +struct bpf_iter_seq_sk_storage_map_info { + struct bpf_map *map; + unsigned int bucket_id; + unsigned skip_elems; +}; + +static struct bpf_sk_storage_elem * +bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, + struct bpf_sk_storage_elem *prev_selem) +{ + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + u32 skip_elems = info->skip_elems; + struct bpf_sk_storage_map *smap; + u32 bucket_id = info->bucket_id; + u32 i, count, n_buckets; + struct bucket *b; + + smap = (struct bpf_sk_storage_map *)info->map; + n_buckets = 1U << smap->bucket_log; + if (bucket_id >= n_buckets) + return NULL; + + /* try to find next selem in the same bucket */ + selem = prev_selem; + count = 0; + while (selem) { + selem = hlist_entry_safe(selem->map_node.next, + struct bpf_sk_storage_elem, map_node); + if (!selem) { + /* not found, unlock and go to the next bucket */ + b = &smap->buckets[bucket_id++]; + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + break; + } + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage) { + info->skip_elems = skip_elems + count; + return selem; + } + count++; + } + + for (i = bucket_id; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + raw_spin_lock_bh(&b->lock); + count = 0; + hlist_for_each_entry(selem, &b->list, map_node) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage && count >= skip_elems) { + info->bucket_id = i; + info->skip_elems = count; + return selem; + } + count++; + } + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + } + + info->bucket_id = i; + info->skip_elems = 0; + return NULL; +} + +static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_sk_storage_elem *selem; + + selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); + if (!selem) + return NULL; + + if (*pos == 0) + ++*pos; + return selem; +} + +static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + + ++*pos; + ++info->skip_elems; + return bpf_sk_storage_map_seq_find_next(seq->private, v); +} + +struct bpf_iter__bpf_sk_storage_map { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(struct sock *, sk); + __bpf_md_ptr(void *, value); +}; + +DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, + struct bpf_map *map, struct sock *sk, + void *value) + +static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_iter__bpf_sk_storage_map ctx = {}; + struct bpf_sk_storage *sk_storage; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, selem == NULL); + if (prog) { + ctx.meta = &meta; + ctx.map = info->map; + if (selem) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + ctx.sk = sk_storage->sk; + ctx.value = SDATA(selem)->data; + } + ret = bpf_iter_run_prog(prog, &ctx); + } + + return ret; +} + +static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_sk_storage_map_seq_show(seq, v); +} + +static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_sk_storage_map *smap; + struct bucket *b; + + if (!v) { + (void)__bpf_sk_storage_map_seq_show(seq, v); + } else { + smap = (struct bpf_sk_storage_map *)info->map; + b = &smap->buckets[info->bucket_id]; + raw_spin_unlock_bh(&b->lock); + } +} + +static int bpf_iter_init_sk_storage_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + + seq_info->map = aux->map; + return 0; +} + +static int bpf_iter_check_map(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux) +{ + struct bpf_map *map = aux->map; + + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) + return -EINVAL; + + if (prog->aux->max_rdonly_access > map->value_size) + return -EACCES; + + return 0; +} + +static const struct seq_operations bpf_sk_storage_map_seq_ops = { + .start = bpf_sk_storage_map_seq_start, + .next = bpf_sk_storage_map_seq_next, + .stop = bpf_sk_storage_map_seq_stop, + .show = bpf_sk_storage_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_sk_storage_map_seq_ops, + .init_seq_private = bpf_iter_init_sk_storage_map, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), +}; + +static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { + .target = "bpf_sk_storage_map", + .check_target = bpf_iter_check_map, + .req_linfo = BPF_ITER_LINK_MAP_FD, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), + PTR_TO_BTF_ID_OR_NULL }, + { offsetof(struct bpf_iter__bpf_sk_storage_map, value), + PTR_TO_RDWR_BUF_OR_NULL }, + }, + .seq_info = &iter_seq_info, +}; + +static int __init bpf_sk_storage_map_iter_init(void) +{ + bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = + btf_sock_ids[BTF_SOCK_TYPE_SOCK]; + return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); +} +late_initcall(bpf_sk_storage_map_iter_init); diff --git a/net/core/dev.c b/net/core/dev.c index ba4de97b676b..7df6c9617321 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -143,6 +143,7 @@ #include <linux/net_namespace.h> #include <linux/indirect_call_wrapper.h> #include <net/devlink.h> +#include <linux/pm_runtime.h> #include "net-sysfs.h" @@ -1492,8 +1493,13 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) ASSERT_RTNL(); - if (!netif_device_present(dev)) - return -ENODEV; + if (!netif_device_present(dev)) { + /* may be detached because parent is runtime-suspended */ + if (dev->dev.parent) + pm_runtime_resume(dev->dev.parent); + if (!netif_device_present(dev)) + return -ENODEV; + } /* Block netpoll from trying to do any rx path servicing. * If we don't do this there is a chance ndo_poll_controller @@ -3448,10 +3454,9 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t features) { - int tmp; __be16 type; - type = skb_network_protocol(skb, &tmp); + type = skb_network_protocol(skb, NULL); features = net_mpls_features(skb, features, type); if (skb->ip_summed != CHECKSUM_NONE && @@ -5442,6 +5447,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) for (i = 0; i < new->aux->used_map_cnt; i++) { if (dev_map_can_have_prog(new->aux->used_maps[i])) return -EINVAL; + if (cpu_map_prog_allowed(new->aux->used_maps[i])) + return -EINVAL; } } @@ -5460,10 +5467,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) } break; - case XDP_QUERY_PROG: - xdp->prog_id = old ? old->aux->id : 0; - break; - default: ret = -EINVAL; break; @@ -5585,7 +5588,7 @@ void netif_receive_skb_list(struct list_head *head) } EXPORT_SYMBOL(netif_receive_skb_list); -DEFINE_PER_CPU(struct work_struct, flush_works); +static DEFINE_PER_CPU(struct work_struct, flush_works); /* Network device is going away, flush any packets still pending */ static void flush_backlog(struct work_struct *work) @@ -6685,7 +6688,9 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) trace_napi_poll(n, work, weight); } - WARN_ON_ONCE(work > weight); + if (unlikely(work > weight)) + pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", + n->poll, work, weight); if (likely(work < weight)) goto out_unlock; @@ -8706,182 +8711,489 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down_generic); -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, - enum bpf_netdev_command cmd) +/** + * dev_change_proto_down_reason - proto down reason + * + * @dev: device + * @mask: proto down mask + * @value: proto down value + */ +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value) { - struct netdev_bpf xdp; + int b; - if (!bpf_op) - return 0; + if (!mask) { + dev->proto_down_reason = value; + } else { + for_each_set_bit(b, &mask, 32) { + if (value & (1 << b)) + dev->proto_down_reason |= BIT(b); + else + dev->proto_down_reason &= ~BIT(b); + } + } +} +EXPORT_SYMBOL(dev_change_proto_down_reason); - memset(&xdp, 0, sizeof(xdp)); - xdp.command = cmd; +struct bpf_xdp_link { + struct bpf_link link; + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */ + int flags; +}; - /* Query must always succeed. */ - WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); +static enum bpf_xdp_mode dev_xdp_mode(u32 flags) +{ + if (flags & XDP_FLAGS_HW_MODE) + return XDP_MODE_HW; + if (flags & XDP_FLAGS_DRV_MODE) + return XDP_MODE_DRV; + return XDP_MODE_SKB; +} - return xdp.prog_id; +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) +{ + switch (mode) { + case XDP_MODE_SKB: + return generic_xdp_install; + case XDP_MODE_DRV: + case XDP_MODE_HW: + return dev->netdev_ops->ndo_bpf; + default: + return NULL; + }; +} + +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + return dev->xdp_state[mode].link; } -static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, - struct netlink_ext_ack *extack, u32 flags, - struct bpf_prog *prog) +static struct bpf_prog *dev_xdp_prog(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + struct bpf_xdp_link *link = dev_xdp_link(dev, mode); + + if (link) + return link->link.prog; + return dev->xdp_state[mode].prog; +} + +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) +{ + struct bpf_prog *prog = dev_xdp_prog(dev, mode); + + return prog ? prog->aux->id : 0; +} + +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_xdp_link *link) +{ + dev->xdp_state[mode].link = link; + dev->xdp_state[mode].prog = NULL; +} + +static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_prog *prog) +{ + dev->xdp_state[mode].link = NULL; + dev->xdp_state[mode].prog = prog; +} + +static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, + bpf_op_t bpf_op, struct netlink_ext_ack *extack, + u32 flags, struct bpf_prog *prog) { - bool non_hw = !(flags & XDP_FLAGS_HW_MODE); - struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; int err; - if (non_hw) { - prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, - XDP_QUERY_PROG)); - if (IS_ERR(prev_prog)) - prev_prog = NULL; - } - memset(&xdp, 0, sizeof(xdp)); - if (flags & XDP_FLAGS_HW_MODE) - xdp.command = XDP_SETUP_PROG_HW; - else - xdp.command = XDP_SETUP_PROG; + xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; xdp.extack = extack; xdp.flags = flags; xdp.prog = prog; + /* Drivers assume refcnt is already incremented (i.e, prog pointer is + * "moved" into driver), so they don't increment it on their own, but + * they do decrement refcnt when program is detached or replaced. + * Given net_device also owns link/prog, we need to bump refcnt here + * to prevent drivers from underflowing it. + */ + if (prog) + bpf_prog_inc(prog); err = bpf_op(dev, &xdp); - if (!err && non_hw) - bpf_prog_change_xdp(prev_prog, prog); + if (err) { + if (prog) + bpf_prog_put(prog); + return err; + } - if (prev_prog) - bpf_prog_put(prev_prog); + if (mode != XDP_MODE_HW) + bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog); - return err; + return 0; } static void dev_xdp_uninstall(struct net_device *dev) { - struct netdev_bpf xdp; - bpf_op_t ndo_bpf; + struct bpf_xdp_link *link; + struct bpf_prog *prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; - /* Remove generic XDP */ - WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); + ASSERT_RTNL(); - /* Remove from the driver */ - ndo_bpf = dev->netdev_ops->ndo_bpf; - if (!ndo_bpf) - return; + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) { + prog = dev_xdp_prog(dev, mode); + if (!prog) + continue; - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - WARN_ON(ndo_bpf(dev, &xdp)); - if (xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) + continue; - /* Remove HW offload */ - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG_HW; - if (!ndo_bpf(dev, &xdp) && xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + + /* auto-detach link from net device */ + link = dev_xdp_link(dev, mode); + if (link) + link->dev = NULL; + else + bpf_prog_put(prog); + + dev_xdp_set_link(dev, mode, NULL); + } } -/** - * dev_change_xdp_fd - set or clear a bpf program for a device rx path - * @dev: device - * @extack: netlink extended ack - * @fd: new program fd or negative value to clear - * @expected_fd: old program fd that userspace expects to replace or clear - * @flags: xdp-related flags - * - * Set or clear a bpf program for a device - */ -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags) +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, + struct bpf_xdp_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog, u32 flags) { - const struct net_device_ops *ops = dev->netdev_ops; - enum bpf_netdev_command query; - u32 prog_id, expected_id = 0; - bpf_op_t bpf_op, bpf_chk; - struct bpf_prog *prog; - bool offload; + struct bpf_prog *cur_prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; int err; ASSERT_RTNL(); - offload = flags & XDP_FLAGS_HW_MODE; - query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; + /* either link or prog attachment, never both */ + if (link && (new_prog || old_prog)) + return -EINVAL; + /* link supports only XDP mode flags */ + if (link && (flags & ~XDP_FLAGS_MODES)) { + NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); + return -EINVAL; + } + /* just one XDP mode bit should be set, zero defaults to SKB mode */ + if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); + return -EINVAL; + } + /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ + if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { + NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); + return -EINVAL; + } - bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { - NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); - return -EOPNOTSUPP; + mode = dev_xdp_mode(flags); + /* can't replace attached link */ + if (dev_xdp_link(dev, mode)) { + NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); + return -EBUSY; } - if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) - bpf_op = generic_xdp_install; - if (bpf_op == bpf_chk) - bpf_chk = generic_xdp_install; - - prog_id = __dev_xdp_query(dev, bpf_op, query); - if (flags & XDP_FLAGS_REPLACE) { - if (expected_fd >= 0) { - prog = bpf_prog_get_type_dev(expected_fd, - BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); - expected_id = prog->aux->id; - bpf_prog_put(prog); - } - if (prog_id != expected_id) { - NL_SET_ERR_MSG(extack, "Active program does not match expected"); - return -EEXIST; - } + cur_prog = dev_xdp_prog(dev, mode); + /* can't replace attached prog with link */ + if (link && cur_prog) { + NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link"); + return -EBUSY; + } + if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { + NL_SET_ERR_MSG(extack, "Active program does not match expected"); + return -EEXIST; + } + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); + return -EBUSY; } - if (fd >= 0) { - if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { - NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); - return -EEXIST; - } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { - NL_SET_ERR_MSG(extack, "XDP program already attached"); - return -EBUSY; - } + /* put effective new program into new_prog */ + if (link) + new_prog = link->link.prog; - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); + if (new_prog) { + bool offload = mode == XDP_MODE_HW; + enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB + ? XDP_MODE_DRV : XDP_MODE_SKB; - if (!offload && bpf_prog_is_dev_bound(prog->aux)) { - NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); - bpf_prog_put(prog); + if (!offload && dev_xdp_prog(dev, other_mode)) { + NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); + return -EEXIST; + } + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { + NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); return -EINVAL; } - - if (prog->expected_attach_type == BPF_XDP_DEVMAP) { + if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); - bpf_prog_put(prog); return -EINVAL; } + if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) { + NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device"); + return -EINVAL; + } + } - /* prog->aux->id may be 0 for orphaned device-bound progs */ - if (prog->aux->id && prog->aux->id == prog_id) { - bpf_prog_put(prog); - return 0; + /* don't call drivers if the effective program didn't change */ + if (new_prog != cur_prog) { + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) { + NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode"); + return -EOPNOTSUPP; + } + + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog); + if (err) + return err; + } + + if (link) + dev_xdp_set_link(dev, mode, link); + else + dev_xdp_set_prog(dev, mode, new_prog); + if (cur_prog) + bpf_prog_put(cur_prog); + + return 0; +} + +static int dev_xdp_attach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags); +} + +static int dev_xdp_detach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + + ASSERT_RTNL(); + + mode = dev_xdp_mode(link->flags); + if (dev_xdp_link(dev, mode) != link) + return -EINVAL; + + bpf_op = dev_xdp_bpf_op(dev, mode); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + dev_xdp_set_link(dev, mode, NULL); + return 0; +} + +static void bpf_xdp_link_release(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + rtnl_lock(); + + /* if racing with net_device's tear down, xdp_link->dev might be + * already NULL, in which case link was already auto-detached + */ + if (xdp_link->dev) { + WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + xdp_link->dev = NULL; + } + + rtnl_unlock(); +} + +static int bpf_xdp_link_detach(struct bpf_link *link) +{ + bpf_xdp_link_release(link); + return 0; +} + +static void bpf_xdp_link_dealloc(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + kfree(xdp_link); +} + +static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + seq_printf(seq, "ifindex:\t%u\n", ifindex); +} + +static int bpf_xdp_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + info->xdp.ifindex = ifindex; + return 0; +} + +static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + int err = 0; + + rtnl_lock(); + + /* link might have been auto-released already, so fail */ + if (!xdp_link->dev) { + err = -ENOLINK; + goto out_unlock; + } + + if (old_prog && link->prog != old_prog) { + err = -EPERM; + goto out_unlock; + } + old_prog = link->prog; + if (old_prog == new_prog) { + /* no-op, don't disturb drivers */ + bpf_prog_put(new_prog); + goto out_unlock; + } + + mode = dev_xdp_mode(xdp_link->flags); + bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); + err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, + xdp_link->flags, new_prog); + if (err) + goto out_unlock; + + old_prog = xchg(&link->prog, new_prog); + bpf_prog_put(old_prog); + +out_unlock: + rtnl_unlock(); + return err; +} + +static const struct bpf_link_ops bpf_xdp_link_lops = { + .release = bpf_xdp_link_release, + .dealloc = bpf_xdp_link_dealloc, + .detach = bpf_xdp_link_detach, + .show_fdinfo = bpf_xdp_link_show_fdinfo, + .fill_link_info = bpf_xdp_link_fill_link_info, + .update_prog = bpf_xdp_link_update, +}; + +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_link_primer link_primer; + struct bpf_xdp_link *link; + struct net_device *dev; + int err, fd; + + dev = dev_get_by_index(net, attr->link_create.target_ifindex); + if (!dev) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out_put_dev; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); + link->dev = dev; + link->flags = attr->link_create.flags; + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + goto out_put_dev; + } + + rtnl_lock(); + err = dev_xdp_attach_link(dev, NULL, link); + rtnl_unlock(); + + if (err) { + bpf_link_cleanup(&link_primer); + goto out_put_dev; + } + + fd = bpf_link_settle(&link_primer); + /* link itself doesn't hold dev's refcnt to not complicate shutdown */ + dev_put(dev); + return fd; + +out_put_dev: + dev_put(dev); + return err; +} + +/** + * dev_change_xdp_fd - set or clear a bpf program for a device rx path + * @dev: device + * @extack: netlink extended ack + * @fd: new program fd or negative value to clear + * @expected_fd: old program fd that userspace expects to replace or clear + * @flags: xdp-related flags + * + * Set or clear a bpf program for a device + */ +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, int expected_fd, u32 flags) +{ + enum bpf_xdp_mode mode = dev_xdp_mode(flags); + struct bpf_prog *new_prog = NULL, *old_prog = NULL; + int err; + + ASSERT_RTNL(); + + if (fd >= 0) { + new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(new_prog)) + return PTR_ERR(new_prog); + } + + if (expected_fd >= 0) { + old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(old_prog)) { + err = PTR_ERR(old_prog); + old_prog = NULL; + goto err_out; } - } else { - if (!prog_id) - return 0; - prog = NULL; } - err = dev_xdp_install(dev, bpf_op, extack, flags, prog); - if (err < 0 && prog) - bpf_prog_put(prog); + err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags); +err_out: + if (err && new_prog) + bpf_prog_put(new_prog); + if (old_prog) + bpf_prog_put(old_prog); return err; } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 547b587c1950..b2cf9b7bb7b8 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -5,6 +5,7 @@ #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> #include <linux/wireless.h> +#include <net/dsa.h> #include <net/wext.h> /* @@ -225,6 +226,26 @@ static int net_hwtstamp_validate(struct ifreq *ifr) return 0; } +static int dev_do_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int err = -EOPNOTSUPP; + + err = dsa_ndo_do_ioctl(dev, ifr, cmd); + if (err == 0 || err != -EOPNOTSUPP) + return err; + + if (ops->ndo_do_ioctl) { + if (netif_device_present(dev)) + err = ops->ndo_do_ioctl(dev, ifr, cmd); + else + err = -ENODEV; + } + + return err; +} + /* * Perform the SIOCxIFxxx calls, inside rtnl_lock() */ @@ -323,13 +344,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCSHWTSTAMP || cmd == SIOCGHWTSTAMP || cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (ops->ndo_do_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); - else - err = -ENODEV; - } + err = dev_do_ioctl(dev, ifr, cmd); } else err = -EINVAL; diff --git a/net/core/devlink.c b/net/core/devlink.c index 47f14a2f25fb..e674f0f46dc2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -85,6 +85,10 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); +static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { + [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY }, +}; + static LIST_HEAD(devlink_list); /* devlink_mutex @@ -382,19 +386,19 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) -#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) -#define DEVLINK_NL_FLAG_NEED_SB BIT(2) +#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(2) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port; struct devlink *devlink; int err; @@ -406,27 +410,18 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_lock(&devlink->lock); - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { - info->user_ptr[0] = devlink; - } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { - struct devlink_port *devlink_port; - + info->user_ptr[0] = devlink; + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { devlink_port = devlink_port_get_from_info(devlink, info); if (IS_ERR(devlink_port)) { err = PTR_ERR(devlink_port); goto unlock; } - info->user_ptr[0] = devlink_port; - } - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { - struct devlink_sb *devlink_sb; - - devlink_sb = devlink_sb_get_from_info(devlink, info); - if (IS_ERR(devlink_sb)) { - err = PTR_ERR(devlink_sb); - goto unlock; - } - info->user_ptr[1] = devlink_sb; + info->user_ptr[1] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) { + devlink_port = devlink_port_get_from_info(devlink, info); + if (!IS_ERR(devlink_port)) + info->user_ptr[1] = devlink_port; } return 0; @@ -442,16 +437,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, { struct devlink *devlink; - /* When devlink changes netns, it would not be found - * by devlink_get_from_info(). So try if it is stored first. - */ - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { - devlink = info->user_ptr[0]; - } else { - devlink = devlink_get_from_info(info); - WARN_ON(IS_ERR(devlink)); - } - if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + devlink = info->user_ptr[0]; + if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); } @@ -524,8 +511,14 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, { struct devlink_port_attrs *attrs = &devlink_port->attrs; - if (!attrs->set) + if (!devlink_port->attrs_set) return 0; + if (attrs->lanes) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes)) + return -EMSGSIZE; + } + if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable)) + return -EMSGSIZE; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; switch (devlink_port->attrs.flavour) { @@ -563,10 +556,54 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } +static int +devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + struct devlink *devlink = port->devlink; + const struct devlink_ops *ops; + struct nlattr *function_attr; + bool empty_nest = true; + int err = 0; + + function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION); + if (!function_attr) + return -EMSGSIZE; + + ops = devlink->ops; + if (ops->port_function_hw_addr_get) { + int hw_addr_len; + u8 hw_addr[MAX_ADDR_LEN]; + + err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + if (err == -EOPNOTSUPP) { + /* Port function attributes are optional for a port. If port doesn't + * support function attribute, returning -EOPNOTSUPP is not an error. + */ + err = 0; + goto out; + } else if (err) { + goto out; + } + err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr); + if (err) + goto out; + empty_nest = false; + } + +out: + if (err || empty_nest) + nla_nest_cancel(msg, function_attr); + else + nla_nest_end(msg, function_attr); + return err; +} + static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_port *devlink_port, enum devlink_command cmd, u32 portid, - u32 seq, int flags) + u32 seq, int flags, + struct netlink_ext_ack *extack) { void *hdr; @@ -607,6 +644,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, spin_unlock_bh(&devlink_port->type_lock); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; + if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) + goto nla_put_failure; genlmsg_end(msg, hdr); return 0; @@ -634,7 +673,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port, if (!msg) return; - err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0); + err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0, + NULL); if (err) { nlmsg_free(msg); return; @@ -697,7 +737,7 @@ out: static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; @@ -708,7 +748,8 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, err = devlink_nl_port_fill(msg, devlink, devlink_port, DEVLINK_CMD_PORT_NEW, - info->snd_portid, info->snd_seq, 0); + info->snd_portid, info->snd_seq, 0, + info->extack); if (err) { nlmsg_free(msg); return err; @@ -740,7 +781,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI); + NLM_F_MULTI, + cb->extack); if (err) { mutex_unlock(&devlink->lock); goto out; @@ -778,10 +820,71 @@ static int devlink_port_type_set(struct devlink *devlink, return -EOPNOTSUPP; } +static int +devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port, + const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops; + const u8 *hw_addr; + int hw_addr_len; + int err; + + hw_addr = nla_data(attr); + hw_addr_len = nla_len(attr); + if (hw_addr_len > MAX_ADDR_LEN) { + NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long"); + return -EINVAL; + } + if (port->type == DEVLINK_PORT_TYPE_ETH) { + if (hw_addr_len != ETH_ALEN) { + NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device"); + return -EINVAL; + } + if (!is_unicast_ether_addr(hw_addr)) { + NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported"); + return -EINVAL; + } + } + + ops = devlink->ops; + if (!ops->port_function_hw_addr_set) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); + return -EOPNOTSUPP; + } + + err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); + if (err) + return err; + + devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); + return 0; +} + +static int +devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, + const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr, + devlink_function_nl_policy, extack); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes"); + return err; + } + + attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; + if (attr) + err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + + return err; +} + static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; int err; @@ -793,6 +896,16 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, if (err) return err; } + + if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) { + struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION]; + struct netlink_ext_ack *extack = info->extack; + + err = devlink_port_function_set(devlink, devlink_port, attr, extack); + if (err) + return err; + } + return 0; } @@ -810,6 +923,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct devlink_port *devlink_port; u32 port_index; u32 count; @@ -817,8 +931,27 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) return -EINVAL; + devlink_port = devlink_port_get_from_info(devlink, info); port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); + + if (IS_ERR(devlink_port)) + return -EINVAL; + + if (!devlink_port->attrs.splittable) { + /* Split ports cannot be split. */ + if (devlink_port->attrs.split) + NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further"); + else + NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split"); + return -EINVAL; + } + + if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count"); + return -EINVAL; + } + return devlink_port_split(devlink, port_index, count, info->extack); } @@ -886,10 +1019,14 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -991,11 +1128,15 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; u16 pool_index; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1102,12 +1243,16 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; enum devlink_sb_threshold_type threshold_type; + struct devlink_sb *devlink_sb; u16 pool_index; u32 size; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1186,13 +1331,17 @@ nla_put_failure: static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; u16 pool_index; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1304,12 +1453,17 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb; u16 pool_index; u32 threshold; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1391,14 +1545,18 @@ nla_put_failure: static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; enum devlink_sb_pool_type pool_type; u16 tc_index; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_type_get_from_info(info, &pool_type); if (err) return err; @@ -1540,14 +1698,19 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink *devlink = info->user_ptr[0]; enum devlink_sb_pool_type pool_type; + struct devlink_sb *devlink_sb; u16 tc_index; u16 pool_index; u32 threshold; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_type_get_from_info(info, &pool_type); if (err) return err; @@ -1575,8 +1738,12 @@ static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; const struct devlink_ops *ops = devlink->ops; + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); if (ops->sb_occ_snapshot) return ops->sb_occ_snapshot(devlink, devlink_sb->index); @@ -1587,8 +1754,12 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; const struct devlink_ops *ops = devlink->ops; + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); if (ops->sb_occ_max_clear) return ops->sb_occ_max_clear(devlink, devlink_sb->index); @@ -2772,7 +2943,7 @@ static void devlink_reload_netns_change(struct devlink *devlink, DEVLINK_CMD_PARAM_NEW); } -static bool devlink_reload_supported(struct devlink *devlink) +static bool devlink_reload_supported(const struct devlink *devlink) { return devlink->ops->reload_down && devlink->ops->reload_up; } @@ -2818,7 +2989,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) struct net *dest_net = NULL; int err; - if (!devlink_reload_supported(devlink) || !devlink->reload_enabled) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -4388,6 +4559,14 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); +int devlink_info_board_serial_number_put(struct devlink_info_req *req, + const char *bsn) +{ + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, + bsn); +} +EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); + static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, const char *version_value) @@ -5141,6 +5320,7 @@ struct devlink_health_reporter { void *priv; const struct devlink_health_reporter_ops *ops; struct devlink *devlink; + struct devlink_port *devlink_port; struct devlink_fmsg *dump_fmsg; struct mutex dump_lock; /* lock parallel read/write from dump buffers */ u64 graceful_period; @@ -5163,18 +5343,98 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter) EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); static struct devlink_health_reporter * -devlink_health_reporter_find_by_name(struct devlink *devlink, - const char *reporter_name) +__devlink_health_reporter_find_by_name(struct list_head *reporter_list, + struct mutex *list_lock, + const char *reporter_name) { struct devlink_health_reporter *reporter; - lockdep_assert_held(&devlink->reporters_lock); - list_for_each_entry(reporter, &devlink->reporter_list, list) + lockdep_assert_held(list_lock); + list_for_each_entry(reporter, reporter_list, list) if (!strcmp(reporter->ops->name, reporter_name)) return reporter; return NULL; } +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink->reporter_list, + &devlink->reporters_lock, + reporter_name); +} + +static struct devlink_health_reporter * +devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list, + &devlink_port->reporters_lock, + reporter_name); +} + +static struct devlink_health_reporter * +__devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + if (WARN_ON(graceful_period && !ops->recover)) + return ERR_PTR(-EINVAL); + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) + return ERR_PTR(-ENOMEM); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = !!ops->recover; + reporter->auto_dump = !!ops->dump; + mutex_init(&reporter->dump_lock); + refcount_set(&reporter->refcount, 1); + return reporter; +} + +/** + * devlink_port_health_reporter_create - create devlink health reporter for + * specified port instance + * + * @port: devlink_port which should contain the new reporter + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&port->reporters_lock); + if (__devlink_health_reporter_find_by_name(&port->reporter_list, + &port->reporters_lock, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + reporter = __devlink_health_reporter_create(port->devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) + goto unlock; + + reporter->devlink_port = port; + list_add_tail(&reporter->list, &port->reporter_list); +unlock: + mutex_unlock(&port->reporters_lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create); + /** * devlink_health_reporter_create - create devlink health reporter * @@ -5196,25 +5456,11 @@ devlink_health_reporter_create(struct devlink *devlink, goto unlock; } - if (WARN_ON(graceful_period && !ops->recover)) { - reporter = ERR_PTR(-EINVAL); - goto unlock; - } - - reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); - if (!reporter) { - reporter = ERR_PTR(-ENOMEM); + reporter = __devlink_health_reporter_create(devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) goto unlock; - } - reporter->priv = priv; - reporter->ops = ops; - reporter->devlink = devlink; - reporter->graceful_period = graceful_period; - reporter->auto_recover = !!ops->recover; - reporter->auto_dump = !!ops->dump; - mutex_init(&reporter->dump_lock); - refcount_set(&reporter->refcount, 1); list_add_tail(&reporter->list, &devlink->reporter_list); unlock: mutex_unlock(&devlink->reporters_lock); @@ -5222,6 +5468,29 @@ unlock: } EXPORT_SYMBOL_GPL(devlink_health_reporter_create); +static void +devlink_health_reporter_free(struct devlink_health_reporter *reporter) +{ + mutex_destroy(&reporter->dump_lock); + if (reporter->dump_fmsg) + devlink_fmsg_free(reporter->dump_fmsg); + kfree(reporter); +} + +static void +devlink_health_reporter_put(struct devlink_health_reporter *reporter) +{ + if (refcount_dec_and_test(&reporter->refcount)) + devlink_health_reporter_free(reporter); +} + +static void +__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + list_del(&reporter->list); + devlink_health_reporter_put(reporter); +} + /** * devlink_health_reporter_destroy - destroy devlink health reporter * @@ -5230,18 +5499,30 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create); void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { - mutex_lock(&reporter->devlink->reporters_lock); - list_del(&reporter->list); - mutex_unlock(&reporter->devlink->reporters_lock); - while (refcount_read(&reporter->refcount) > 1) - msleep(100); - mutex_destroy(&reporter->dump_lock); - if (reporter->dump_fmsg) - devlink_fmsg_free(reporter->dump_fmsg); - kfree(reporter); + struct mutex *lock = &reporter->devlink->reporters_lock; + + mutex_lock(lock); + __devlink_health_reporter_destroy(reporter); + mutex_unlock(lock); } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); +/** + * devlink_port_health_reporter_destroy - destroy devlink port health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + struct mutex *lock = &reporter->devlink_port->reporters_lock; + + mutex_lock(lock); + __devlink_health_reporter_destroy(reporter); + mutex_unlock(lock); +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy); + static int devlink_nl_health_reporter_fill(struct sk_buff *msg, struct devlink *devlink, @@ -5259,6 +5540,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; + if (reporter->devlink_port) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index)) + goto genlmsg_cancel; + } reporter_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_HEALTH_REPORTER); if (!reporter_attr) @@ -5466,17 +5751,28 @@ devlink_health_reporter_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) { struct devlink_health_reporter *reporter; + struct devlink_port *devlink_port; char *reporter_name; if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) return NULL; reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - mutex_lock(&devlink->reporters_lock); - reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); - if (reporter) - refcount_inc(&reporter->refcount); - mutex_unlock(&devlink->reporters_lock); + devlink_port = devlink_port_get_from_attrs(devlink, attrs); + if (IS_ERR(devlink_port)) { + mutex_lock(&devlink->reporters_lock); + reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink->reporters_lock); + } else { + mutex_lock(&devlink_port->reporters_lock); + reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink_port->reporters_lock); + } + return reporter; } @@ -5508,12 +5804,6 @@ unlock: return NULL; } -static void -devlink_health_reporter_put(struct devlink_health_reporter *reporter) -{ - refcount_dec(&reporter->refcount); -} - void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state) @@ -5570,6 +5860,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct devlink_health_reporter *reporter; + struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; int idx = 0; @@ -5600,6 +5891,31 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, } mutex_unlock(&devlink->reporters_lock); } + + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + list_for_each_entry(port, &devlink->port_list, list) { + mutex_lock(&port->reporters_lock); + list_for_each_entry(reporter, &port->reporter_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&port->reporters_lock); + goto out; + } + idx++; + } + mutex_unlock(&port->reporters_lock); + } + } out: mutex_unlock(&devlink_mutex); @@ -6107,7 +6423,7 @@ static int __devlink_trap_action_set(struct devlink *devlink, } err = devlink->ops->trap_action_set(devlink, trap_item->trap, - trap_action); + trap_action, extack); if (err) return err; @@ -6397,7 +6713,8 @@ static int devlink_trap_group_set(struct devlink *devlink, } policer = policer_item ? policer_item->policer : NULL; - err = devlink->ops->trap_group_set(devlink, group_item->group, policer); + err = devlink->ops->trap_group_set(devlink, group_item->group, policer, + extack); if (err) return err; @@ -6721,6 +7038,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 }, [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 }, [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 }, + [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -6729,7 +7047,6 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -6752,24 +7069,20 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_split_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_unsplit_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, .dumpit = devlink_nl_cmd_sb_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { @@ -6777,8 +7090,6 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_get_doit, .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { @@ -6786,16 +7097,13 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_get_doit, .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { @@ -6803,16 +7111,14 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { @@ -6820,60 +7126,50 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_snapshot_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_max_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_ESWITCH_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_get_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_get, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_entries_get, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_headers_get, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -6881,20 +7177,17 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_counters_set, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_set, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_dump, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -6902,15 +7195,13 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_reload, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PARAM_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_get_doit, .dumpit = devlink_nl_cmd_param_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -6918,7 +7209,6 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_PORT_PARAM_GET, @@ -6941,21 +7231,18 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_NEW, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_new, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_del, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_READ, @@ -6963,14 +7250,12 @@ static const struct genl_ops devlink_nl_ops[] = { GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_region_read_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_INFO_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_info_get_doit, .dumpit = devlink_nl_cmd_info_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -6978,7 +7263,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, /* can be retrieved by unprivileged users */ }, @@ -6987,7 +7272,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -6995,7 +7280,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7003,7 +7288,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7012,7 +7297,7 @@ static const struct genl_ops devlink_nl_ops[] = { GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7020,7 +7305,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7028,46 +7313,39 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_flash_update, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_TRAP_GET, .doit = devlink_nl_cmd_trap_get_doit, .dumpit = devlink_nl_cmd_trap_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_TRAP_SET, .doit = devlink_nl_cmd_trap_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .doit = devlink_nl_cmd_trap_group_get_doit, .dumpit = devlink_nl_cmd_trap_group_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_TRAP_GROUP_SET, .doit = devlink_nl_cmd_trap_group_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .doit = devlink_nl_cmd_trap_policer_get_doit, .dumpit = devlink_nl_cmd_trap_policer_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_TRAP_POLICER_SET, .doit = devlink_nl_cmd_trap_policer_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, }; @@ -7132,9 +7410,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc); */ int devlink_register(struct devlink *devlink, struct device *dev) { - mutex_lock(&devlink_mutex); devlink->dev = dev; devlink->registered = true; + mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); @@ -7280,6 +7558,8 @@ int devlink_port_register(struct devlink *devlink, list_add_tail(&devlink_port->list, &devlink->port_list); INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); + INIT_LIST_HEAD(&devlink_port->reporter_list); + mutex_init(&devlink_port->reporters_lock); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); devlink_port_type_warn_schedule(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); @@ -7296,6 +7576,8 @@ void devlink_port_unregister(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; + WARN_ON(!list_empty(&devlink_port->reporter_list)); + mutex_destroy(&devlink_port->reporters_lock); devlink_port_type_warn_cancel(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); mutex_lock(&devlink->lock); @@ -7389,24 +7671,20 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) EXPORT_SYMBOL_GPL(devlink_port_type_clear); static int __devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - const unsigned char *switch_id, - unsigned char switch_id_len) + enum devlink_port_flavour flavour) { struct devlink_port_attrs *attrs = &devlink_port->attrs; if (WARN_ON(devlink_port->registered)) return -EEXIST; - attrs->set = true; + devlink_port->attrs_set = true; attrs->flavour = flavour; - if (switch_id) { - attrs->switch_port = true; - if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) - switch_id_len = MAX_PHYS_ITEM_ID_LEN; - memcpy(attrs->switch_id.id, switch_id, switch_id_len); - attrs->switch_id.id_len = switch_id_len; + if (attrs->switch_id.id_len) { + devlink_port->switch_port = true; + if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN)) + attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN; } else { - attrs->switch_port = false; + devlink_port->switch_port = false; } return 0; } @@ -7415,33 +7693,18 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, * devlink_port_attrs_set - Set port attributes * * @devlink_port: devlink port - * @flavour: flavour of the port - * @port_number: number of the port that is facing user, for example - * the front panel port number - * @split: indicates if this is split port - * @split_subport_number: if the port is split, this is the number - * of subport. - * @switch_id: if the port is part of switch, this is buffer with ID, - * otwerwise this is NULL - * @switch_id_len: length of the switch_id buffer + * @attrs: devlink port attrs */ void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number, - const unsigned char *switch_id, - unsigned char switch_id_len) + struct devlink_port_attrs *attrs) { - struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - ret = __devlink_port_attrs_set(devlink_port, flavour, - switch_id, switch_id_len); + devlink_port->attrs = *attrs; + ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) return; - attrs->split = split; - attrs->phys.port_number = port_number; - attrs->phys.split_subport_number = split_subport_number; + WARN_ON(attrs->splittable && attrs->split); } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); @@ -7450,20 +7713,14 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set); * * @devlink_port: devlink port * @pf: associated PF for the devlink port instance - * @switch_id: if the port is part of switch, this is buffer with ID, - * otherwise this is NULL - * @switch_id_len: length of the switch_id buffer */ -void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf) +void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_PF, - switch_id, switch_id_len); + DEVLINK_PORT_FLAVOUR_PCI_PF); if (ret) return; @@ -7477,21 +7734,15 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set); * @devlink_port: devlink port * @pf: associated PF for the devlink port instance * @vf: associated VF of a PF for the devlink port instance - * @switch_id: if the port is part of switch, this is buffer with ID, - * otherwise this is NULL - * @switch_id_len: length of the switch_id buffer */ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf, u16 vf) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_VF, - switch_id, switch_id_len); + DEVLINK_PORT_FLAVOUR_PCI_VF); if (ret) return; attrs->pci_vf.pf = pf; @@ -7505,7 +7756,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; - if (!attrs->set) + if (!devlink_port->attrs_set) return -EOPNOTSUPP; switch (attrs->flavour) { @@ -8551,6 +8802,7 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(PTP_GENERAL, CONTROL), DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL), DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL), + DEVLINK_TRAP(EARLY_DROP, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ @@ -8800,7 +9052,8 @@ static void devlink_trap_disable(struct devlink *devlink, if (WARN_ON_ONCE(!trap_item)) return; - devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP); + devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP, + NULL); trap_item->action = DEVLINK_TRAP_ACTION_DROP; } @@ -9341,7 +9594,7 @@ int devlink_compat_switch_id_get(struct net_device *dev, * any devlink lock as only permanent values are accessed. */ devlink_port = netdev_to_devlink_port(dev); - if (!devlink_port || !devlink_port->attrs.switch_port) + if (!devlink_port || !devlink_port->switch_port) return -EOPNOTSUPP; memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid)); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bd7eba9066f8..51678a528f85 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -14,6 +14,20 @@ #include <net/sock.h> #include <net/fib_rules.h> #include <net/ip_tunnels.h> +#include <linux/indirect_call_wrapper.h> + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#ifdef CONFIG_IP_MULTIPLE_TABLES +#define INDIRECT_CALL_MT(f, f2, f1, ...) \ + INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) +#endif +#elif defined(CONFIG_IP_MULTIPLE_TABLES) +#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__) +#endif static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(0), @@ -267,7 +281,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, uid_gt(fl->flowi_uid, rule->uid_range.end)) goto out; - ret = ops->match(rule, fl, flags); + ret = INDIRECT_CALL_MT(ops->match, + fib6_rule_match, + fib4_rule_match, + rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; } @@ -298,9 +315,15 @@ jumped: } else if (rule->action == FR_ACT_NOP) continue; else - err = ops->action(rule, fl, flags, arg); - - if (!err && ops->suppress && ops->suppress(rule, arg)) + err = INDIRECT_CALL_MT(ops->action, + fib6_rule_action, + fib4_rule_action, + rule, fl, flags, arg); + + if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, + fib6_rule_suppress, + fib4_rule_suppress, + rule, arg)) continue; if (err != -EAGAIN) { diff --git a/net/core/filter.c b/net/core/filter.c index 82e1b5b06167..7124f0fe6974 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -47,6 +47,7 @@ #include <linux/seccomp.h> #include <linux/if_vlan.h> #include <linux/bpf.h> +#include <linux/btf.h> #include <net/sch_generic.h> #include <net/cls_cgroup.h> #include <net/dst_metadata.h> @@ -73,6 +74,31 @@ #include <net/lwtunnel.h> #include <net/ipv6_stubs.h> #include <net/bpf_sk_storage.h> +#include <net/transp_v6.h> +#include <linux/btf_ids.h> + +int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) +{ + if (in_compat_syscall()) { + struct compat_sock_fprog f32; + + if (len != sizeof(f32)) + return -EINVAL; + if (copy_from_sockptr(&f32, src, sizeof(f32))) + return -EFAULT; + memset(dst, 0, sizeof(*dst)); + dst->len = f32.len; + dst->filter = compat_ptr(f32.filter); + } else { + if (len != sizeof(*dst)) + return -EINVAL; + if (copy_from_sockptr(dst, src, sizeof(*dst))) + return -EFAULT; + } + + return 0; +} +EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); /** * sk_filter_trim_cap - run a packet through a socket filter @@ -3777,7 +3803,9 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static int bpf_skb_output_btf_ids[5]; +BTF_ID_LIST(bpf_skb_output_btf_ids) +BTF_ID(struct, sk_buff) + const struct bpf_func_proto bpf_skb_output_proto = { .func = bpf_skb_event_output, .gpl_only = true, @@ -4171,7 +4199,9 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static int bpf_xdp_output_btf_ids[5]; +BTF_ID_LIST(bpf_xdp_output_btf_ids) +BTF_ID(struct, xdp_buff) + const struct bpf_func_proto bpf_xdp_output_proto = { .func = bpf_xdp_event_output, .gpl_only = true, @@ -4289,10 +4319,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen, u32 flags) { char devname[IFNAMSIZ]; + int val, valbool; struct net *net; int ifindex; int ret = 0; - int val; if (!sk_fullsock(sk)) return -EINVAL; @@ -4303,6 +4333,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) return -EINVAL; val = *((int *)optval); + valbool = val ? 1 : 0; /* Only some socketops are supported */ switch (optname) { @@ -4361,6 +4392,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, } ret = sock_bindtoindex(sk, ifindex, false); break; + case SO_KEEPALIVE: + if (sk->sk_prot->keepalive) + sk->sk_prot->keepalive(sk, valbool); + sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); + break; default: ret = -EINVAL; } @@ -4421,6 +4457,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ret = tcp_set_congestion_control(sk, name, false, reinit, true); } else { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); if (optlen != sizeof(int)) @@ -4449,6 +4486,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, else tp->save_syn = val; break; + case TCP_KEEPIDLE: + ret = tcp_sock_set_keepidle_locked(sk, val); + break; + case TCP_KEEPINTVL: + if (val < 1 || val > MAX_TCP_KEEPINTVL) + ret = -EINVAL; + else + tp->keepalive_intvl = val * HZ; + break; + case TCP_KEEPCNT: + if (val < 1 || val > MAX_TCP_KEEPCNT) + ret = -EINVAL; + else + tp->keepalive_probes = val; + break; + case TCP_SYNCNT: + if (val < 1 || val > MAX_TCP_SYNCNT) + ret = -EINVAL; + else + icsk->icsk_syn_retries = val; + break; + case TCP_USER_TIMEOUT: + if (val < 0) + ret = -EINVAL; + else + icsk->icsk_user_timeout = val; + break; default: ret = -EINVAL; } @@ -6123,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func) } const struct bpf_func_proto bpf_event_output_data_proto __weak; +const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak; static const struct bpf_func_proto * sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -6155,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; #endif + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_cg_sock_proto; default: return bpf_base_func_proto(func_id); } @@ -6858,6 +6925,7 @@ static bool __sock_filter_check_attach_type(int off, case offsetof(struct bpf_sock, priority): switch (attach_type) { case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: goto full_access; default: return false; @@ -9187,6 +9255,189 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; + +DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled); +EXPORT_SYMBOL(bpf_sk_lookup_enabled); + +BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, + struct sock *, sk, u64, flags) +{ + if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE | + BPF_SK_LOOKUP_F_NO_REUSEPORT))) + return -EINVAL; + if (unlikely(sk && sk_is_refcounted(sk))) + return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ + if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) + return -ESOCKTNOSUPPORT; /* reject connected sockets */ + + /* Check if socket is suitable for packet L3/L4 protocol */ + if (sk && sk->sk_protocol != ctx->protocol) + return -EPROTOTYPE; + if (sk && sk->sk_family != ctx->family && + (sk->sk_family == AF_INET || ipv6_only_sock(sk))) + return -EAFNOSUPPORT; + + if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE)) + return -EEXIST; + + /* Select socket as lookup result */ + ctx->selected_sk = sk; + ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT; + return 0; +} + +static const struct bpf_func_proto bpf_sk_lookup_assign_proto = { + .func = bpf_sk_lookup_assign, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto * +sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_event_output_data_proto; + case BPF_FUNC_sk_assign: + return &bpf_sk_lookup_assign_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static bool sk_lookup_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= sizeof(struct bpf_sk_lookup)) + return false; + if (off % size != 0) + return false; + if (type != BPF_READ) + return false; + + switch (off) { + case offsetof(struct bpf_sk_lookup, sk): + info->reg_type = PTR_TO_SOCKET_OR_NULL; + return size == sizeof(__u64); + + case bpf_ctx_range(struct bpf_sk_lookup, family): + case bpf_ctx_range(struct bpf_sk_lookup, protocol): + case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4): + case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): + case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): + case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + case bpf_ctx_range(struct bpf_sk_lookup, local_port): + bpf_ctx_record_field_size(info, sizeof(__u32)); + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); + + default: + return false; + } +} + +static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct bpf_sk_lookup, sk): + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, selected_sk)); + break; + + case offsetof(struct bpf_sk_lookup, family): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + family, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, protocol): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + protocol, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, remote_ip4): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + v4.saddr, 4, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, local_ip4): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + v4.daddr, 4, target_size)); + break; + + case bpf_ctx_range_till(struct bpf_sk_lookup, + remote_ip6[0], remote_ip6[3]): { +#if IS_ENABLED(CONFIG_IPV6) + int off = si->off; + + off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]); + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, v6.saddr)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + } + case bpf_ctx_range_till(struct bpf_sk_lookup, + local_ip6[0], local_ip6[3]): { +#if IS_ENABLED(CONFIG_IPV6) + int off = si->off; + + off -= offsetof(struct bpf_sk_lookup, local_ip6[0]); + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, v6.daddr)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + } + case offsetof(struct bpf_sk_lookup, remote_port): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + sport, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, local_port): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + dport, 2, target_size)); + break; + } + + return insn - insn_buf; +} + +const struct bpf_prog_ops sk_lookup_prog_ops = { +}; + +const struct bpf_verifier_ops sk_lookup_verifier_ops = { + .get_func_proto = sk_lookup_func_proto, + .is_valid_access = sk_lookup_is_valid_access, + .convert_ctx_access = sk_lookup_convert_ctx_access, +}; + #endif /* CONFIG_INET */ DEFINE_BPF_DISPATCHER(xdp) @@ -9195,3 +9446,132 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) { bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } + +#ifdef CONFIG_DEBUG_INFO_BTF +BTF_ID_LIST_GLOBAL(btf_sock_ids) +#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) +BTF_SOCK_TYPE_xxx +#undef BTF_SOCK_TYPE +#else +u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; +#endif + +static bool check_arg_btf_id(u32 btf_id, u32 arg) +{ + int i; + + /* only one argument, no need to check arg */ + for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) + if (btf_sock_ids[i] == btf_id) + return true; + return false; +} + +BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) +{ + /* tcp6_sock type is not generated in dwarf and hence btf, + * trigger an explicit type generation here. + */ + BTF_TYPE_EMIT(struct tcp6_sock); + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && + sk->sk_family == AF_INET6) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { + .func = bpf_skc_to_tcp6_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6], +}; + +BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) +{ + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { + .func = bpf_skc_to_tcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP], +}; + +BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) +{ +#ifdef CONFIG_INET + if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) + return (unsigned long)sk; +#endif + +#if IS_BUILTIN(CONFIG_IPV6) + if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) + return (unsigned long)sk; +#endif + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { + .func = bpf_skc_to_tcp_timewait_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW], +}; + +BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) +{ +#ifdef CONFIG_INET + if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) + return (unsigned long)sk; +#endif + +#if IS_BUILTIN(CONFIG_IPV6) + if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) + return (unsigned long)sk; +#endif + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = { + .func = bpf_skc_to_tcp_request_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ], +}; + +BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk) +{ + /* udp6_sock type is not generated in dwarf and hence btf, + * trigger an explicit type generation here. + */ + BTF_TYPE_EMIT(struct udp6_sock); + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && + sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { + .func = bpf_skc_to_udp6_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], +}; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 142a8824f0a8..29806eb765cf 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -383,6 +383,23 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); +void skb_flow_dissect_hash(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_hash *key; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH)) + return; + + key = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_HASH, + target_container); + + key->hash = skb_get_hash_raw(skb); +} +EXPORT_SYMBOL(skb_flow_dissect_hash); + static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 2076219b8ba5..d4474c812b64 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -430,7 +430,7 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, struct Qdisc *sch, void *data, void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { @@ -438,6 +438,7 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block, flow_block->indr.data = data; flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; + flow_block->indr.sch = sch; flow_block->indr.cleanup = cleanup; } @@ -445,7 +446,8 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv), struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, + struct Qdisc *sch, void *data, void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { @@ -455,7 +457,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, if (IS_ERR(block_cb)) goto out; - flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); + flow_block_indr_init(block_cb, bo, dev, sch, data, indr_cb_priv, cleanup); list_add(&block_cb->indr.list, &flow_block_indr_list); out: @@ -463,7 +465,7 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -int flow_indr_dev_setup_offload(struct net_device *dev, +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) @@ -472,7 +474,7 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo, data, cleanup); + this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); mutex_unlock(&flow_indr_block_lock); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ef6b5a8f629c..8e39e28b0a8d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1783,6 +1783,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, + [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 85a4b0101f76..68e0682450c6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1000,6 +1000,16 @@ static size_t rtnl_prop_list_size(const struct net_device *dev) return size; } +static size_t rtnl_proto_down_size(const struct net_device *dev) +{ + size_t size = nla_total_size(1); + + if (dev->proto_down_reason) + size += nla_total_size(0) + nla_total_size(4); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1041,7 +1051,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(4) /* IFLA_NEW_IFINDEX */ - + nla_total_size(1) /* IFLA_PROTO_DOWN */ + + rtnl_proto_down_size(dev) /* proto down */ + nla_total_size(4) /* IFLA_TARGET_NETNSID */ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ @@ -1416,13 +1426,12 @@ static u32 rtnl_xdp_prog_skb(struct net_device *dev) static u32 rtnl_xdp_prog_drv(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG); + return dev_xdp_prog_id(dev, XDP_MODE_DRV); } static u32 rtnl_xdp_prog_hw(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, - XDP_QUERY_PROG_HW); + return dev_xdp_prog_id(dev, XDP_MODE_HW); } static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev, @@ -1658,6 +1667,35 @@ nest_cancel: return ret; } +static int rtnl_fill_proto_down(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *pr; + u32 preason; + + if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) + goto nla_put_failure; + + preason = dev->proto_down_reason; + if (!preason) + return 0; + + pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON); + if (!pr) + return -EMSGSIZE; + + if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) { + nla_nest_cancel(skb, pr); + goto nla_put_failure; + } + + nla_nest_end(skb, pr); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1708,13 +1746,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + atomic_read(&dev->carrier_down_count)) || - nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) || nla_put_u32(skb, IFLA_CARRIER_UP_COUNT, atomic_read(&dev->carrier_up_count)) || nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT, atomic_read(&dev->carrier_down_count))) goto nla_put_failure; + if (rtnl_fill_proto_down(skb, dev)) + goto nla_put_failure; + if (event != IFLA_EVENT_NONE) { if (nla_put_u32(skb, IFLA_EVENT, event)) goto nla_put_failure; @@ -1834,6 +1874,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_ALT_IFNAME] = { .type = NLA_STRING, .len = ALTIFNAMSIZ - 1 }, [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, + [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2483,6 +2524,67 @@ static int do_set_master(struct net_device *dev, int ifindex, return 0; } +static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = { + [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 }, + [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 }, +}; + +static int do_set_proto_down(struct net_device *dev, + struct nlattr *nl_proto_down, + struct nlattr *nl_proto_down_reason, + struct netlink_ext_ack *extack) +{ + struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1]; + const struct net_device_ops *ops = dev->netdev_ops; + unsigned long mask = 0; + u32 value; + bool proto_down; + int err; + + if (!ops->ndo_change_proto_down) { + NL_SET_ERR_MSG(extack, "Protodown not supported by device"); + return -EOPNOTSUPP; + } + + if (nl_proto_down_reason) { + err = nla_parse_nested_deprecated(pdreason, + IFLA_PROTO_DOWN_REASON_MAX, + nl_proto_down_reason, + ifla_proto_down_reason_policy, + NULL); + if (err < 0) + return err; + + if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) { + NL_SET_ERR_MSG(extack, "Invalid protodown reason value"); + return -EINVAL; + } + + value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]); + + if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) + mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); + + dev_change_proto_down_reason(dev, mask, value); + } + + if (nl_proto_down) { + proto_down = nla_get_u8(nl_proto_down); + + /* Dont turn off protodown if there are active reasons */ + if (!proto_down && dev->proto_down_reason) { + NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); + return -EBUSY; + } + err = dev_change_proto_down(dev, + proto_down); + if (err) + return err; + } + + return 0; +} + #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 @@ -2771,9 +2873,9 @@ static int do_setlink(const struct sk_buff *skb, } err = 0; - if (tb[IFLA_PROTO_DOWN]) { - err = dev_change_proto_down(dev, - nla_get_u8(tb[IFLA_PROTO_DOWN])); + if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) { + err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN], + tb[IFLA_PROTO_DOWN_REASON], extack); if (err) goto errout; status |= DO_SETLINK_NOTIFY; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8afefe6f6b6..2828f6d5ba89 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3758,7 +3758,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int err = -ENOMEM; int i = 0; int pos; - int dummy; if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { @@ -3780,7 +3779,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, } __skb_push(head_skb, doffset); - proto = skb_network_protocol(head_skb, &dummy); + proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL); @@ -4413,7 +4412,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) * at the moment even if they are anonymous). */ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) + !__pskb_pull_tail(skb, __skb_pagelen(skb))) return -ENOMEM; /* Easy case. Most of packets will go this way. */ @@ -4692,7 +4691,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - skb = tcp_get_timestamping_opt_stats(sk); + skb = tcp_get_timestamping_opt_stats(sk, orig_skb); opt_stats = true; } else #endif diff --git a/net/core/sock.c b/net/core/sock.c index 8ccdcdaaa673..49cd5ffe673e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -113,6 +113,7 @@ #include <linux/static_key.h> #include <linux/memcontrol.h> #include <linux/prefetch.h> +#include <linux/compat.h> #include <linux/uaccess.h> @@ -360,7 +361,8 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) return sizeof(tv); } -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval) +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, + bool old_timeval) { struct __kernel_sock_timeval tv; @@ -370,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool if (optlen < sizeof(tv32)) return -EINVAL; - if (copy_from_user(&tv32, optval, sizeof(tv32))) + if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) return -EFAULT; tv.tv_sec = tv32.tv_sec; tv.tv_usec = tv32.tv_usec; @@ -379,14 +381,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool if (optlen < sizeof(old_tv)) return -EINVAL; - if (copy_from_user(&old_tv, optval, sizeof(old_tv))) + if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) return -EFAULT; tv.tv_sec = old_tv.tv_sec; tv.tv_usec = old_tv.tv_usec; } else { if (optlen < sizeof(tv)) return -EINVAL; - if (copy_from_user(&tv, optval, sizeof(tv))) + if (copy_from_sockptr(&tv, optval, sizeof(tv))) return -EFAULT; } if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) @@ -608,8 +610,7 @@ int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) } EXPORT_SYMBOL(sock_bindtoindex); -static int sock_setbindtodevice(struct sock *sk, char __user *optval, - int optlen) +static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -631,7 +632,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, memset(devname, 0, sizeof(devname)); ret = -EFAULT; - if (copy_from_user(devname, optval, optlen)) + if (copy_from_sockptr(devname, optval, optlen)) goto out; index = 0; @@ -695,15 +696,6 @@ out: return ret; } -static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit, - int valbool) -{ - if (valbool) - sock_set_flag(sk, bit); - else - sock_reset_flag(sk, bit); -} - bool sk_mc_loop(struct sock *sk) { if (dev_recursion_level()) @@ -834,7 +826,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf); */ int sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock_txtime sk_txtime; struct sock *sk = sock->sk; @@ -853,7 +845,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; valbool = val ? 1 : 0; @@ -966,7 +958,7 @@ set_sndbuf: ret = -EINVAL; /* 1003.1g */ break; } - if (copy_from_user(&ling, optval, sizeof(ling))) { + if (copy_from_sockptr(&ling, optval, sizeof(ling))) { ret = -EFAULT; break; } @@ -1060,60 +1052,52 @@ set_sndbuf: case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD); + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, + optlen, optname == SO_RCVTIMEO_OLD); break; case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD); + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, + optlen, optname == SO_SNDTIMEO_OLD); break; - case SO_ATTACH_FILTER: - ret = -EINVAL; - if (optlen == sizeof(struct sock_fprog)) { - struct sock_fprog fprog; - - ret = -EFAULT; - if (copy_from_user(&fprog, optval, sizeof(fprog))) - break; + case SO_ATTACH_FILTER: { + struct sock_fprog fprog; + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); + if (!ret) ret = sk_attach_filter(&fprog, sk); - } break; - + } case SO_ATTACH_BPF: ret = -EINVAL; if (optlen == sizeof(u32)) { u32 ufd; ret = -EFAULT; - if (copy_from_user(&ufd, optval, sizeof(ufd))) + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) break; ret = sk_attach_bpf(ufd, sk); } break; - case SO_ATTACH_REUSEPORT_CBPF: - ret = -EINVAL; - if (optlen == sizeof(struct sock_fprog)) { - struct sock_fprog fprog; - - ret = -EFAULT; - if (copy_from_user(&fprog, optval, sizeof(fprog))) - break; + case SO_ATTACH_REUSEPORT_CBPF: { + struct sock_fprog fprog; + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); + if (!ret) ret = sk_reuseport_attach_filter(&fprog, sk); - } break; - + } case SO_ATTACH_REUSEPORT_EBPF: ret = -EINVAL; if (optlen == sizeof(u32)) { u32 ufd; ret = -EFAULT; - if (copy_from_user(&ufd, optval, sizeof(ufd))) + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) break; ret = sk_reuseport_attach_bpf(ufd, sk); @@ -1193,7 +1177,7 @@ set_sndbuf: if (sizeof(ulval) != sizeof(val) && optlen >= sizeof(ulval) && - get_user(ulval, (unsigned long __user *)optval)) { + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { ret = -EFAULT; break; } @@ -1236,7 +1220,7 @@ set_sndbuf: if (optlen != sizeof(struct sock_txtime)) { ret = -EINVAL; break; - } else if (copy_from_user(&sk_txtime, optval, + } else if (copy_from_sockptr(&sk_txtime, optval, sizeof(struct sock_txtime))) { ret = -EFAULT; break; @@ -2802,20 +2786,6 @@ int sock_no_shutdown(struct socket *sock, int how) } EXPORT_SYMBOL(sock_no_shutdown); -int sock_no_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(sock_no_setsockopt); - -int sock_no_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(sock_no_getsockopt); - int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; @@ -3243,20 +3213,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_common_getsockopt); -#ifdef CONFIG_COMPAT -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->compat_getsockopt != NULL) - return sk->sk_prot->compat_getsockopt(sk, level, optname, - optval, optlen); - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); -} -EXPORT_SYMBOL(compat_sock_common_getsockopt); -#endif - int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -3276,7 +3232,7 @@ EXPORT_SYMBOL(sock_common_recvmsg); * Set socket options on an inet socket. */ int sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; @@ -3284,20 +3240,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_common_setsockopt); -#ifdef CONFIG_COMPAT -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->compat_setsockopt != NULL) - return sk->sk_prot->compat_setsockopt(sk, level, optname, - optval, optlen); - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); -} -EXPORT_SYMBOL(compat_sock_common_setsockopt); -#endif - void sk_common_release(struct sock *sk) { if (sk->sk_prot->destroy) @@ -3596,6 +3538,7 @@ int sock_load_diag_module(int family, int protocol) #ifdef CONFIG_INET if (family == AF_INET && protocol != IPPROTO_RAW && + protocol < MAX_INET_PROTOS && !rcu_access_pointer(inet_protos[protocol])) return -ENOENT; #endif diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 0971f17e8e54..119f52a99dc1 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -681,6 +681,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_map_btf_id; const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, @@ -691,9 +692,11 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_stab", + .map_btf_id = &sock_map_btf_id, }; -struct bpf_htab_elem { +struct bpf_shtab_elem { struct rcu_head rcu; u32 hash; struct sock *sk; @@ -701,14 +704,14 @@ struct bpf_htab_elem { u8 key[]; }; -struct bpf_htab_bucket { +struct bpf_shtab_bucket { struct hlist_head head; raw_spinlock_t lock; }; -struct bpf_htab { +struct bpf_shtab { struct bpf_map map; - struct bpf_htab_bucket *buckets; + struct bpf_shtab_bucket *buckets; u32 buckets_num; u32 elem_size; struct sk_psock_progs progs; @@ -720,17 +723,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len) return jhash(key, len, 0); } -static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab, - u32 hash) +static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab, + u32 hash) { return &htab->buckets[hash & (htab->buckets_num - 1)]; } -static struct bpf_htab_elem * +static struct bpf_shtab_elem * sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, u32 key_size) { - struct bpf_htab_elem *elem; + struct bpf_shtab_elem *elem; hlist_for_each_entry_rcu(elem, head, node) { if (elem->hash == hash && @@ -743,10 +746,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 key_size = map->key_size, hash; - struct bpf_htab_bucket *bucket; - struct bpf_htab_elem *elem; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; WARN_ON_ONCE(!rcu_read_lock_held()); @@ -757,8 +760,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) return elem ? elem->sk : NULL; } -static void sock_hash_free_elem(struct bpf_htab *htab, - struct bpf_htab_elem *elem) +static void sock_hash_free_elem(struct bpf_shtab *htab, + struct bpf_shtab_elem *elem) { atomic_dec(&htab->count); kfree_rcu(elem, rcu); @@ -767,9 +770,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab, static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, void *link_raw) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_elem *elem_probe, *elem = link_raw; - struct bpf_htab_bucket *bucket; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_elem *elem_probe, *elem = link_raw; + struct bpf_shtab_bucket *bucket; WARN_ON_ONCE(!rcu_read_lock_held()); bucket = sock_hash_select_bucket(htab, elem->hash); @@ -791,10 +794,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, static int sock_hash_delete_elem(struct bpf_map *map, void *key) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 hash, key_size = map->key_size; - struct bpf_htab_bucket *bucket; - struct bpf_htab_elem *elem; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; int ret = -ENOENT; hash = sock_hash_bucket_hash(key, key_size); @@ -812,12 +815,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) return ret; } -static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab, - void *key, u32 key_size, - u32 hash, struct sock *sk, - struct bpf_htab_elem *old) +static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab, + void *key, u32 key_size, + u32 hash, struct sock *sk, + struct bpf_shtab_elem *old) { - struct bpf_htab_elem *new; + struct bpf_shtab_elem *new; if (atomic_inc_return(&htab->count) > htab->map.max_entries) { if (!old) { @@ -841,10 +844,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab, static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 key_size = map->key_size, hash; - struct bpf_htab_elem *elem, *elem_new; - struct bpf_htab_bucket *bucket; + struct bpf_shtab_elem *elem, *elem_new; + struct bpf_shtab_bucket *bucket; struct sk_psock_link *link; struct sk_psock *psock; int ret; @@ -954,8 +957,8 @@ out: static int sock_hash_get_next_key(struct bpf_map *map, void *key, void *key_next) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_elem *elem, *elem_next; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_elem *elem, *elem_next; u32 hash, key_size = map->key_size; struct hlist_head *head; int i = 0; @@ -969,7 +972,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, goto find_first_elem; elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)), - struct bpf_htab_elem, node); + struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); return 0; @@ -981,7 +984,7 @@ find_first_elem: for (; i < htab->buckets_num; i++) { head = &sock_hash_select_bucket(htab, i)->head; elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), - struct bpf_htab_elem, node); + struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); return 0; @@ -993,7 +996,7 @@ find_first_elem: static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) { - struct bpf_htab *htab; + struct bpf_shtab *htab; int i, err; u64 cost; @@ -1015,15 +1018,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&htab->map, attr); htab->buckets_num = roundup_pow_of_two(htab->map.max_entries); - htab->elem_size = sizeof(struct bpf_htab_elem) + + htab->elem_size = sizeof(struct bpf_shtab_elem) + round_up(htab->map.key_size, 8); if (htab->buckets_num == 0 || - htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) { + htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) { err = -EINVAL; goto free_htab; } - cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) + + cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) + (u64) htab->elem_size * htab->map.max_entries; if (cost >= U32_MAX - PAGE_SIZE) { err = -EINVAL; @@ -1034,7 +1037,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) goto free_htab; htab->buckets = bpf_map_area_alloc(htab->buckets_num * - sizeof(struct bpf_htab_bucket), + sizeof(struct bpf_shtab_bucket), htab->map.numa_node); if (!htab->buckets) { bpf_map_charge_finish(&htab->map.memory); @@ -1055,10 +1058,10 @@ free_htab: static void sock_hash_free(struct bpf_map *map) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_bucket *bucket; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_bucket *bucket; struct hlist_head unlink_list; - struct bpf_htab_elem *elem; + struct bpf_shtab_elem *elem; struct hlist_node *node; int i; @@ -1134,7 +1137,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key) static void sock_hash_release_progs(struct bpf_map *map) { - psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs); + psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs); } BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops, @@ -1214,6 +1217,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_hash_map_btf_id; const struct bpf_map_ops sock_hash_ops = { .map_alloc = sock_hash_alloc, .map_free = sock_hash_free, @@ -1224,6 +1228,8 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_shtab", + .map_btf_id = &sock_hash_map_btf_id, }; static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) @@ -1232,7 +1238,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) case BPF_MAP_TYPE_SOCKMAP: return &container_of(map, struct bpf_stab, map)->progs; case BPF_MAP_TYPE_SOCKHASH: - return &container_of(map, struct bpf_htab, map)->progs; + return &container_of(map, struct bpf_shtab, map)->progs; default: break; } diff --git a/net/core/tso.c b/net/core/tso.c index d4d5c077ad72..4148f6d48953 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -6,18 +6,17 @@ #include <asm/unaligned.h> /* Calculate expected number of TX descriptors */ -int tso_count_descs(struct sk_buff *skb) +int tso_count_descs(const struct sk_buff *skb) { /* The Marvell Way */ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags; } EXPORT_SYMBOL(tso_count_descs); -void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, +void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { - struct tcphdr *tcph; - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int hdr_len = skb_transport_offset(skb) + tso->tlen; int mac_hdr_len = skb_network_offset(skb); memcpy(hdr, skb->data, hdr_len); @@ -30,23 +29,31 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, } else { struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len); - iph->payload_len = htons(size + tcp_hdrlen(skb)); + iph->payload_len = htons(size + tso->tlen); } - tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); - put_unaligned_be32(tso->tcp_seq, &tcph->seq); + hdr += skb_transport_offset(skb); + if (tso->tlen != sizeof(struct udphdr)) { + struct tcphdr *tcph = (struct tcphdr *)hdr; - if (!is_last) { - /* Clear all special flags for not last packet */ - tcph->psh = 0; - tcph->fin = 0; - tcph->rst = 0; + put_unaligned_be32(tso->tcp_seq, &tcph->seq); + + if (!is_last) { + /* Clear all special flags for not last packet */ + tcph->psh = 0; + tcph->fin = 0; + tcph->rst = 0; + } + } else { + struct udphdr *uh = (struct udphdr *)hdr; + + uh->len = htons(sizeof(*uh) + size); } } EXPORT_SYMBOL(tso_build_hdr); -void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) +void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size) { - tso->tcp_seq += size; + tso->tcp_seq += size; /* not worth avoiding this operation for UDP */ tso->size -= size; tso->data += size; @@ -62,12 +69,14 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) } EXPORT_SYMBOL(tso_build_data); -void tso_start(struct sk_buff *skb, struct tso_t *tso) +int tso_start(struct sk_buff *skb, struct tso_t *tso) { - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr); + int hdr_len = skb_transport_offset(skb) + tlen; + tso->tlen = tlen; tso->ip_id = ntohs(ip_hdr(skb)->id); - tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); + tso->tcp_seq = (tlen != sizeof(struct udphdr)) ? ntohl(tcp_hdr(skb)->seq) : 0; tso->next_frag_idx = 0; tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6); @@ -83,5 +92,6 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) tso->data = skb_frag_address(frag); tso->next_frag_idx++; } + return hdr_len; } EXPORT_SYMBOL(tso_start); diff --git a/net/core/xdp.c b/net/core/xdp.c index 3c45f99e26d5..48aba933a5a8 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -400,15 +400,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -int xdp_attachment_query(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - bpf->prog_id = info->prog ? info->prog->aux->id : 0; - bpf->prog_flags = info->prog ? info->flags : 0; - return 0; -} -EXPORT_SYMBOL_GPL(xdp_attachment_query); - bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { |