diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 239 | ||||
-rw-r--r-- | net/core/dst.c | 16 | ||||
-rw-r--r-- | net/core/ethtool.c | 16 | ||||
-rw-r--r-- | net/core/fib_rules.c | 9 | ||||
-rw-r--r-- | net/core/filter.c | 378 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 100 | ||||
-rw-r--r-- | net/core/lwt_bpf.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 18 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 17 | ||||
-rw-r--r-- | net/core/net-traces.c | 3 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 355 | ||||
-rw-r--r-- | net/core/skbuff.c | 57 | ||||
-rw-r--r-- | net/core/sock.c | 2 |
14 files changed, 908 insertions, 306 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 3964c108b169..522873ed120b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -189,7 +189,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, } if (!skb->len) { skb = skb_set_peeked(skb); - if (unlikely(IS_ERR(skb))) { + if (IS_ERR(skb)) { *err = PTR_ERR(skb); return NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index 11596a302a26..1423cf4d695c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include <linux/crash_dump.h> #include <linux/sctp.h> #include <net/udp_tunnel.h> +#include <linux/net_namespace.h> #include "net-sysfs.h" @@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info); static struct napi_struct *napi_by_id(unsigned int napi_id); @@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id); DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static DEFINE_MUTEX(ifalias_mutex); + /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); @@ -1265,29 +1267,53 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { - char *new_ifalias; - - ASSERT_RTNL(); + struct dev_ifalias *new_alias = NULL; if (len >= IFALIASZ) return -EINVAL; - if (!len) { - kfree(dev->ifalias); - dev->ifalias = NULL; - return 0; + if (len) { + new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL); + if (!new_alias) + return -ENOMEM; + + memcpy(new_alias->ifalias, alias, len); + new_alias->ifalias[len] = 0; } - new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!new_ifalias) - return -ENOMEM; - dev->ifalias = new_ifalias; - memcpy(dev->ifalias, alias, len); - dev->ifalias[len] = 0; + mutex_lock(&ifalias_mutex); + rcu_swap_protected(dev->ifalias, new_alias, + mutex_is_locked(&ifalias_mutex)); + mutex_unlock(&ifalias_mutex); + + if (new_alias) + kfree_rcu(new_alias, rcuhead); return len; } +/** + * dev_get_alias - get ifalias of a device + * @dev: device + * @name: buffer to store name of ifalias + * @len: size of buffer + * + * get ifalias for a device. Caller must make sure dev cannot go + * away, e.g. rcu read lock or own a reference count to device. + */ +int dev_get_alias(const struct net_device *dev, char *name, size_t len) +{ + const struct dev_ifalias *alias; + int ret = 0; + + rcu_read_lock(); + alias = rcu_dereference(dev->ifalias); + if (alias) + ret = snprintf(name, len, "%s", alias->ifalias); + rcu_read_unlock(); + + return ret; +} /** * netdev_features_change - device changes features @@ -1312,10 +1338,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info.dev = dev, + }; - change_info.flags_changed = 0; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } @@ -1536,9 +1563,10 @@ EXPORT_SYMBOL(dev_disable_lro); static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - netdev_notifier_info_init(&info, dev); return nb->notifier_call(nb, val, &info); } @@ -1663,11 +1691,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); */ static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info) { ASSERT_RTNL(); - netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } @@ -1682,9 +1708,11 @@ static int call_netdevice_notifiers_info(unsigned long val, int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - return call_netdevice_notifiers_info(val, dev, &info); + return call_netdevice_notifiers_info(val, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -2012,6 +2040,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) return 0; } +EXPORT_SYMBOL(netdev_txq_to_tc); #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); @@ -3245,22 +3274,22 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { - struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; - if (!cl) + if (!miniq) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); *ret = NET_XMIT_DROP; kfree_skb(skb); return NULL; @@ -3864,8 +3893,8 @@ drop: static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { + u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - u32 act = XDP_DROP; void *orig_data; int hlen, off; u32 mac_len; @@ -3876,8 +3905,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (skb_cloned(skb)) return XDP_PASS; - if (skb_linearize(skb)) - goto do_drop; + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (troom > 0 && __skb_linearize(skb)) + goto do_drop; + } /* The XDP program wants to see the packet starting at the MAC * header. @@ -3885,6 +3931,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, mac_len = skb->data - skb_mac_header(skb); hlen = skb_headlen(skb) + mac_len; xdp.data = skb->data - mac_len; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); orig_data = xdp.data; @@ -3902,10 +3949,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: __skb_push(skb, mac_len); - /* fall through */ + break; case XDP_PASS: + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); break; - default: bpf_warn_invalid_xdp_action(act); /* fall through */ @@ -4140,7 +4189,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); struct tcf_result cl_res; /* If there's at least one ingress present somewhere (so @@ -4148,8 +4197,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, * that are not configured with an ingress qdisc will bail * out here. */ - if (!cl) + if (!miniq) return skb; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; @@ -4157,15 +4207,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_at_ingress = 1; - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); kfree_skb(skb); return NULL; case TC_ACT_STOLEN: @@ -4443,6 +4493,33 @@ out: return ret; } +/** + * netif_receive_skb_core - special purpose version of netif_receive_skb + * @skb: buffer to process + * + * More direct receive version of netif_receive_skb(). It should + * only be used by callers that have a need to skip RPS and Generic XDP. + * Caller must also take care of handling if (page_is_)pfmemalloc. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb_core(struct sk_buff *skb) +{ + int ret; + + rcu_read_lock(); + ret = __netif_receive_skb_core(skb, false); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(netif_receive_skb_core); + static int __netif_receive_skb(struct sk_buff *skb) { int ret; @@ -4695,6 +4772,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); @@ -6228,9 +6306,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv, void *upper_info) -{ - struct netdev_notifier_changeupper_info changeupper_info; + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) +{ + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + .extack = extack, + }, + .upper_dev = upper_dev, + .master = master, + .linking = true, + .upper_info = upper_info, + }; int ret = 0; ASSERT_RTNL(); @@ -6248,12 +6336,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - changeupper_info.upper_dev = upper_dev; - changeupper_info.master = master; - changeupper_info.linking = true; - changeupper_info.upper_info = upper_info; - - ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6264,7 +6347,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6289,9 +6372,11 @@ rollback: * returns zero. */ int netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netlink_ext_ack *extack) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, + NULL, NULL, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -6310,10 +6395,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info) + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) { return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info); + upper_priv, upper_info, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); @@ -6328,20 +6414,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - struct netdev_notifier_changeupper_info changeupper_info; + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + }, + .upper_dev = upper_dev, + .linking = false, + }; ASSERT_RTNL(); - changeupper_info.upper_dev = upper_dev; changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; - changeupper_info.linking = false; - call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -6357,11 +6447,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink); void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info info; + struct netdev_notifier_bonding_info info = { + .info.dev = dev, + }; memcpy(&info.bonding_info, bonding_info, sizeof(struct netdev_bonding_info)); - call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, &info.info); } EXPORT_SYMBOL(netdev_bonding_info_change); @@ -6487,11 +6579,13 @@ EXPORT_SYMBOL(dev_get_nest_level); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info) { - struct netdev_notifier_changelowerstate_info changelowerstate_info; + struct netdev_notifier_changelowerstate_info changelowerstate_info = { + .info.dev = lower_dev, + }; ASSERT_RTNL(); changelowerstate_info.lower_state_info = lower_state_info; - call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, &changelowerstate_info.info); } EXPORT_SYMBOL(netdev_lower_state_changed); @@ -6782,11 +6876,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, if (dev->flags & IFF_UP && (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info = { + .dev = dev, + }, + .flags_changed = changes, + }; - change_info.flags_changed = changes; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, - &change_info.info); + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); } } @@ -7157,7 +7254,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL); + GFP_KERNEL, NULL); /* * Flush the unicast and multicast chains @@ -7994,7 +8091,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; - size_t alloc_size; + unsigned int alloc_size; struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -8244,7 +8341,7 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int err; + int err, new_nsid; ASSERT_RTNL(); @@ -8300,7 +8397,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) + new_nsid = peernet2id_alloc(dev_net(dev), net); + else + new_nsid = peernet2id(dev_net(dev), net); + rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid); /* * Flush the unicast and multicast chains diff --git a/net/core/dst.c b/net/core/dst.c index a6c47da7d0f8..662a2d4a3d19 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) return md_dst; } EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); + +void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) +{ +#ifdef CONFIG_DST_CACHE + int cpu; + + for_each_possible_cpu(cpu) { + struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); + + if (one_md_dst->type == METADATA_IP_TUNNEL) + dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); + } +#endif + free_percpu(md_dst); +} +EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9a9a3d77e327..f8fcf450a36e 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } +/* Given two link masks, AND them together and save the result in dst. */ +void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, + struct ethtool_link_ksettings *src) +{ + unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS); + unsigned int idx = 0; + + for (; idx < size; idx++) { + dst->link_modes.supported[idx] &= + src->link_modes.supported[idx]; + dst->link_modes.advertising[idx] &= + src->link_modes.advertising[idx]; + } +} +EXPORT_SYMBOL(ethtool_intersect_link_masks); + void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a6d97c1d810..fafd0a41e3f7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -314,10 +314,12 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, static int call_fib_rule_notifiers(struct net *net, enum fib_event_type event_type, struct fib_rule *rule, - struct fib_rules_ops *ops) + struct fib_rules_ops *ops, + struct netlink_ext_ack *extack) { struct fib_rule_notifier_info info = { .info.family = ops->family, + .info.extack = extack, .rule = rule, }; @@ -609,7 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); @@ -749,7 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, + NULL); notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); diff --git a/net/core/filter.c b/net/core/filter.c index 6ae94f825f72..a0112168d6f9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -43,6 +43,7 @@ #include <linux/timer.h> #include <linux/uaccess.h> #include <asm/unaligned.h> +#include <asm/cmpxchg.h> #include <linux/filter.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> @@ -1406,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, { int err = __bpf_try_make_writable(skb, write_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return err; } @@ -1968,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, ret = skb_vlan_push(skb, vlan_proto, vlan_tci); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -1990,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) ret = skb_vlan_pop(skb); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2184,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, * need to be verified first. */ ret = bpf_skb_proto_xlat(skb, proto); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2309,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : bpf_skb_net_grow(skb, len_diff_abs); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2400,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, skb_gso_reset(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2440,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, skb_reset_mac_header(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return 0; } @@ -2453,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; +static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) +{ + return xdp_data_meta_unsupported(xdp) ? 0 : + xdp->data - xdp->data_meta; +} + BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { + unsigned long metalen = xdp_get_metalen(xdp); + void *data_start = xdp->data_hard_start + metalen; void *data = xdp->data + offset; - if (unlikely(data < xdp->data_hard_start || + if (unlikely(data < data_start || data > xdp->data_end - ETH_HLEN)) return -EINVAL; + if (metalen) + memmove(xdp->data_meta + offset, + xdp->data_meta, metalen); + xdp->data_meta += offset; xdp->data = data; return 0; @@ -2474,6 +2487,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) +{ + void *meta = xdp->data_meta + offset; + unsigned long metalen = xdp->data - meta; + + if (xdp_data_meta_unsupported(xdp)) + return -ENOTSUPP; + if (unlikely(meta < xdp->data_hard_start || + meta > xdp->data)) + return -EINVAL; + if (unlikely((metalen & (sizeof(__u32) - 1)) || + (metalen > 32))) + return -EACCES; + + xdp->data_meta = meta; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { + .func = bpf_xdp_adjust_meta, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static int __bpf_tx_xdp(struct net_device *dev, struct bpf_map *map, struct xdp_buff *xdp, @@ -2488,10 +2528,36 @@ static int __bpf_tx_xdp(struct net_device *dev, err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); if (err) return err; - if (map) + dev->netdev_ops->ndo_xdp_flush(dev); + return 0; +} + +static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + struct net_device *dev = fwd; + + if (!dev->netdev_ops->ndo_xdp_xmit) + return -EOPNOTSUPP; + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; __dev_map_insert_ctx(map, index); - else - dev->netdev_ops->ndo_xdp_flush(dev); + + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + struct bpf_cpu_map_entry *rcpu = fwd; + + err = cpu_map_enqueue(rcpu, xdp, dev_rx); + if (err) + return err; + __cpu_map_insert_ctx(map, index); + } return 0; } @@ -2501,11 +2567,33 @@ void xdp_do_flush_map(void) struct bpf_map *map = ri->map_to_flush; ri->map_to_flush = NULL; - if (map) - __dev_map_flush(map); + if (map) { + switch (map->map_type) { + case BPF_MAP_TYPE_DEVMAP: + __dev_map_flush(map); + break; + case BPF_MAP_TYPE_CPUMAP: + __cpu_map_flush(map); + break; + default: + break; + } + } } EXPORT_SYMBOL_GPL(xdp_do_flush_map); +static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) +{ + switch (map->map_type) { + case BPF_MAP_TYPE_DEVMAP: + return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_CPUMAP: + return __cpu_map_lookup_elem(map, index); + default: + return NULL; + } +} + static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, unsigned long aux) { @@ -2518,8 +2606,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; - struct net_device *fwd = NULL; u32 index = ri->ifindex; + void *fwd = NULL; int err; ri->ifindex = 0; @@ -2532,7 +2620,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, goto err; } - fwd = __dev_map_lookup_elem(map, index); + fwd = __xdp_map_lookup_elem(map, index); if (!fwd) { err = -EINVAL; goto err; @@ -2540,7 +2628,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, if (ri->map_to_flush && ri->map_to_flush != map) xdp_do_flush_map(); - err = __bpf_tx_xdp(fwd, map, xdp, index); + err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); if (unlikely(err)) goto err; @@ -2582,54 +2670,88 @@ err: } EXPORT_SYMBOL_GPL(xdp_do_redirect); -int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog) +static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (skb->len > len) + return -EMSGSIZE; + + return 0; +} + +int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; struct net_device *fwd = NULL; u32 index = ri->ifindex; - unsigned int len; int err = 0; ri->ifindex = 0; ri->map = NULL; ri->map_owner = 0; - if (map) { - if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { - err = -EFAULT; - map = NULL; - goto err; - } - fwd = __dev_map_lookup_elem(map, index); - } else { - fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { + err = -EFAULT; + map = NULL; + goto err; } + fwd = __xdp_map_lookup_elem(map, index); if (unlikely(!fwd)) { err = -EINVAL; goto err; } - if (unlikely(!(fwd->flags & IFF_UP))) { - err = -ENETDOWN; + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + goto err; + skb->dev = fwd; + } else { + /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ + err = -EBADRQC; goto err; } - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) { - err = -EMSGSIZE; + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); + return err; +} + +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + u32 index = ri->ifindex; + struct net_device *fwd; + int err = 0; + + if (ri->map) + return xdp_do_generic_redirect_map(dev, skb, xdp_prog); + + ri->ifindex = 0; + fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(!fwd)) { + err = -EINVAL; goto err; } + if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + goto err; + skb->dev = fwd; - map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index) - : _trace_xdp_redirect(dev, xdp_prog, index); + _trace_xdp_redirect(dev, xdp_prog, index); return 0; err: - map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err) - : _trace_xdp_redirect_err(dev, xdp_prog, index, err); + _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); @@ -2698,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head) + func == bpf_xdp_adjust_head || + func == bpf_xdp_adjust_meta) return true; return false; @@ -2949,14 +3072,15 @@ static const struct bpf_func_proto * bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) { if (!md_dst) { - /* Race is not possible, since it's called from verifier - * that is holding verifier mutex. - */ - md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, - METADATA_IP_TUNNEL, - GFP_KERNEL); - if (!md_dst) + struct metadata_dst __percpu *tmp; + + tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, + METADATA_IP_TUNNEL, + GFP_KERNEL); + if (!tmp) return NULL; + if (cmpxchg(&md_dst, NULL, tmp)) + metadata_dst_free_percpu(tmp); } switch (which) { @@ -3151,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, static const struct bpf_func_proto bpf_setsockopt_proto = { .func = bpf_setsockopt, - .gpl_only = true, + .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -3160,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + struct sock *sk = bpf_sock->sk; + + if (!sk_fullsock(sk)) + goto err_clear; + +#ifdef CONFIG_INET + if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { + if (optname == TCP_CONGESTION) { + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!icsk->icsk_ca_ops || optlen <= 1) + goto err_clear; + strncpy(optval, icsk->icsk_ca_ops->name, optlen); + optval[optlen - 1] = 0; + } else { + goto err_clear; + } + } else { + goto err_clear; + } + return 0; +#endif +err_clear: + memset(optval, 0, optlen); + return -EINVAL; +} + +static const struct bpf_func_proto bpf_getsockopt_proto = { + .func = bpf_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3294,6 +3459,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_xdp_adjust_meta: + return &bpf_xdp_adjust_meta_proto; case BPF_FUNC_redirect: return &bpf_xdp_redirect_proto; case BPF_FUNC_redirect_map: @@ -3336,6 +3503,8 @@ static const struct bpf_func_proto * switch (func_id) { case BPF_FUNC_setsockopt: return &bpf_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_getsockopt_proto; case BPF_FUNC_sock_map_update: return &bpf_sock_map_update_proto; default: @@ -3424,6 +3593,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): if (size != size_default) return false; @@ -3450,6 +3620,7 @@ static bool sk_filter_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; @@ -3474,6 +3645,7 @@ static bool lwt_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, data_meta): return false; } @@ -3592,6 +3764,9 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; + case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3602,6 +3777,25 @@ static bool tc_cls_act_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, info); } +static bool +tc_cls_act_is_valid_access_analyzer(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case offsetof(struct sk_buff, len): + return true; + case offsetof(struct sk_buff, data): + info->reg_type = PTR_TO_PACKET; + return true; + case offsetof(struct sk_buff, cb) + + offsetof(struct bpf_skb_data_end, data_end): + info->reg_type = PTR_TO_PACKET_END; + return true; + } + return false; +} + static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) @@ -3625,6 +3819,9 @@ static bool xdp_is_valid_access(int off, int size, case offsetof(struct xdp_md, data): info->reg_type = PTR_TO_PACKET; break; + case offsetof(struct xdp_md, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case offsetof(struct xdp_md, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3633,6 +3830,21 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } +static bool xdp_is_valid_access_analyzer(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case offsetof(struct xdp_buff, data): + info->reg_type = PTR_TO_PACKET; + return true; + case offsetof(struct xdp_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + return true; + } + return false; +} + void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; @@ -3683,6 +3895,12 @@ static bool sk_skb_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + return false; + } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, tc_index): @@ -3695,7 +3913,6 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, mark): - case bpf_ctx_range(struct __sk_buff, tc_classid): return false; case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; @@ -3853,6 +4070,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, offsetof(struct sk_buff, data)); break; + case offsetof(struct __sk_buff, data_meta): + off = si->off; + off -= offsetof(struct __sk_buff, data_meta); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct bpf_skb_data_end, data_meta); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + case offsetof(struct __sk_buff, data_end): off = si->off; off -= offsetof(struct __sk_buff, data_end); @@ -4101,6 +4327,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data)); break; + case offsetof(struct xdp_md, data_meta): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, data_meta)); + break; case offsetof(struct xdp_md, data_end): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), si->dst_reg, si->src_reg, @@ -4269,68 +4500,103 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops sk_filter_prog_ops = { +const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -const struct bpf_verifier_ops tc_cls_act_prog_ops = { +const struct bpf_prog_ops sk_filter_prog_ops = { +}; + +const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_verifier_ops tc_cls_act_analyzer_ops = { + .is_valid_access = tc_cls_act_is_valid_access_analyzer, +}; + +const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops xdp_prog_ops = { +const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, +}; + +const struct bpf_verifier_ops xdp_analyzer_ops = { + .is_valid_access = xdp_is_valid_access_analyzer, +}; + +const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; -const struct bpf_verifier_ops cg_skb_prog_ops = { +const struct bpf_verifier_ops cg_skb_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_inout_prog_ops = { +const struct bpf_verifier_ops lwt_inout_verifier_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops lwt_inout_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_xmit_prog_ops = { +const struct bpf_verifier_ops lwt_xmit_verifier_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops lwt_xmit_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops cg_sock_prog_ops = { +const struct bpf_verifier_ops cg_sock_verifier_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -const struct bpf_verifier_ops sock_ops_prog_ops = { +const struct bpf_prog_ops cg_sock_prog_ops = { +}; + +const struct bpf_verifier_ops sock_ops_verifier_ops = { .get_func_proto = sock_ops_func_proto, .is_valid_access = sock_ops_is_valid_access, .convert_ctx_access = sock_ops_convert_ctx_access, }; -const struct bpf_verifier_ops sk_skb_prog_ops = { +const struct bpf_prog_ops sock_ops_prog_ops = { +}; + +const struct bpf_verifier_ops sk_skb_verifier_ops = { .get_func_proto = sk_skb_func_proto, .is_valid_access = sk_skb_is_valid_access, .convert_ctx_access = sk_skb_convert_ctx_access, .gen_prologue = sk_skb_prologue, }; +const struct bpf_prog_ops sk_skb_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0a977373d003..1f5caafb4492 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,7 @@ #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <net/dsa.h> +#include <net/dst_metadata.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/gre.h> @@ -115,6 +116,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); +static void +skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_control *ctrl; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) + return; + + ctrl = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + target_container); + ctrl->addr_type = type; +} + +static void +__skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct ip_tunnel_info *info; + struct ip_tunnel_key *key; + + /* A quick check to see if there might be something to do. */ + if (!dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS)) + return; + + info = skb_tunnel_info(skb); + if (!info) + return; + + key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *ipv4; + + ipv4 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target_container); + ipv4->src = key->u.ipv4.src; + ipv4->dst = key->u.ipv4.dst; + } + break; + case AF_INET6: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *ipv6; + + ipv6 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + target_container); + ipv6->src = key->u.ipv6.src; + ipv6->dst = key->u.ipv6.dst; + } + break; + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *keyid; + + keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target_container); + keyid->keyid = tunnel_id_to_key32(key->tun_id); + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *tp; + + tp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + target_container); + tp->src = key->tp_src; + tp->dst = key->tp_dst; + } +} + static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -478,6 +575,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); + __skb_flow_dissect_tunnel_info(skb, flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1307731ddfe4..e7e626fb87bb 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, */ preempt_disable(); rcu_read_lock(); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); rcu_read_unlock(); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16a1a4c4eb57..6ea3a1a7f36a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) { struct neighbour *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val; struct neigh_hash_table *nht; @@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { u32 hash_val; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; int error; struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); struct neigh_hash_table *nht; @@ -572,7 +572,7 @@ out_neigh_release: } EXPORT_SYMBOL(__neigh_create); -static u32 pneigh_hash(const void *pkey, int key_len) +static u32 pneigh_hash(const void *pkey, unsigned int key_len) { u32 hash_val = *(u32 *)(pkey + key_len - 4); hash_val ^= (hash_val >> 16); @@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len) static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct net *net, const void *pkey, - int key_len, + unsigned int key_len, struct net_device *dev) { while (n) { @@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); return __pneigh_lookup_1(tbl->phash_buckets[hash_val], @@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net_device *dev, int creat) { struct pneigh_entry *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); @@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); @@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(dst_attr) < tbl->key_len) + if (nla_len(dst_attr) < (int)tbl->key_len) goto out; if (ndm->ndm_flags & NTF_PROXY) { @@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(tb[NDA_DST]) < tbl->key_len) + if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) goto out; dst = nla_data(tb[NDA_DST]); lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 927a6dcbad96..51d5836d8fb9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -391,10 +391,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); ret = dev_set_alias(netdev, buf, count); - rtnl_unlock(); return ret < 0 ? ret : len; } @@ -403,13 +400,12 @@ static ssize_t ifalias_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct net_device *netdev = to_net_dev(dev); + char tmp[IFALIASZ]; ssize_t ret = 0; - if (!rtnl_trylock()) - return restart_syscall(); - if (netdev->ifalias) - ret = sprintf(buf, "%s\n", netdev->ifalias); - rtnl_unlock(); + ret = dev_get_alias(netdev, tmp, sizeof(tmp)); + if (ret > 0) + ret = sprintf(buf, "%s\n", tmp); return ret; } static DEVICE_ATTR_RW(ifalias); @@ -1488,7 +1484,10 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); - kfree(dev->ifalias); + /* no need to wait for rcu grace period: + * device is dead and about to be freed. + */ + kfree(rcu_access_pointer(dev->ifalias)); netdev_freemem(dev); } diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 71f209542364..380934580fa1 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -32,6 +32,7 @@ #include <trace/events/napi.h> #include <trace/events/sock.h> #include <trace/events/udp.h> +#include <trace/events/tcp.h> #include <trace/events/fib.h> #include <trace/events/qdisc.h> #if IS_ENABLED(CONFIG_IPV6) @@ -49,3 +50,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); + +EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ace48926b19..de24d394c69e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) { const struct rtnl_af_ops *ops; - list_for_each_entry(ops, &rtnl_af_ops, list) { + list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { if (ops->family == family) return ops; } @@ -470,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) void rtnl_af_register(struct rtnl_af_ops *ops) { rtnl_lock(); - list_add_tail(&ops->list, &rtnl_af_ops); + list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); } EXPORT_SYMBOL_GPL(rtnl_af_register); /** - * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. - * @ops: struct rtnl_af_ops * to unregister - * - * The caller must hold the rtnl_mutex. - */ -void __rtnl_af_unregister(struct rtnl_af_ops *ops) -{ - list_del(&ops->list); -} -EXPORT_SYMBOL_GPL(__rtnl_af_unregister); - -/** * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. * @ops: struct rtnl_af_ops * to unregister */ void rtnl_af_unregister(struct rtnl_af_ops *ops) { rtnl_lock(); - __rtnl_af_unregister(ops); + list_del_rcu(&ops->list); rtnl_unlock(); + + synchronize_rcu(); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -508,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, /* IFLA_AF_SPEC */ size = nla_total_size(sizeof(struct nlattr)); - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + af_ops->get_link_af_size(dev, ext_filter_mask); } } + rcu_read_unlock(); return size; } @@ -522,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, static bool rtnl_have_link_slave_info(const struct net_device *dev) { struct net_device *master_dev; + bool ret = false; - master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + rcu_read_lock(); + + master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (master_dev && master_dev->rtnl_link_ops) - return true; - return false; + ret = true; + rcu_read_unlock(); + return ret; } static int rtnl_link_slave_info_fill(struct sk_buff *skb, @@ -923,6 +919,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1211,6 +1208,36 @@ nla_put_vfinfo_failure: return -EMSGSIZE; } +static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, + struct net_device *dev, + u32 ext_filter_mask) +{ + struct nlattr *vfinfo; + int i, num_vfs; + + if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0)) + return 0; + + num_vfs = dev_num_vf(dev->dev.parent); + if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs)) + return -EMSGSIZE; + + if (!dev->netdev_ops->ndo_get_vf_config) + return 0; + + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + return -EMSGSIZE; + + for (i = 0; i < num_vfs; i++) { + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + return -EMSGSIZE; + } + + nla_nest_end(skb, vfinfo); + return 0; +} + static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { struct rtnl_link_ifmap map; @@ -1307,16 +1334,106 @@ static u32 rtnl_get_event(unsigned long event) return rtnl_event_type; } +static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) +{ + const struct net_device *upper_dev; + int ret = 0; + + rcu_read_lock(); + + upper_dev = netdev_master_upper_dev_get_rcu(dev); + if (upper_dev) + ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + + rcu_read_unlock(); + return ret; +} + +static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) +{ + int ifindex = dev_get_iflink(dev); + + if (dev->ifindex == ifindex) + return 0; + + return nla_put_u32(skb, IFLA_LINK, ifindex); +} + +static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, + struct net_device *dev) +{ + char buf[IFALIASZ]; + int ret; + + ret = dev_get_alias(dev, buf, sizeof(buf)); + return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0; +} + +static int rtnl_fill_link_netnsid(struct sk_buff *skb, + const struct net_device *dev) +{ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id_alloc(dev_net(dev), link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + return -EMSGSIZE; + } + } + + return 0; +} + +static int rtnl_fill_link_af(struct sk_buff *skb, + const struct net_device *dev, + u32 ext_filter_mask) +{ + const struct rtnl_af_ops *af_ops; + struct nlattr *af_spec; + + af_spec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af_spec) + return -EMSGSIZE; + + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { + struct nlattr *af; + int err; + + if (!af_ops->fill_link_af) + continue; + + af = nla_nest_start(skb, af_ops->family); + if (!af) + return -EMSGSIZE; + + err = af_ops->fill_link_af(skb, dev, ext_filter_mask); + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + return -EMSGSIZE; + + nla_nest_end(skb, af); + } + + nla_nest_end(skb, af_spec); + return 0; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event) + u32 event, int *new_nsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct nlattr *af_spec; - struct rtnl_af_ops *af_ops; - struct net_device *upper_dev = netdev_master_upper_dev_get(dev); ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1345,15 +1462,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev_get_iflink(dev) && - nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || - (upper_dev && - nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || + nla_put_iflink(skb, dev) || + put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || - (dev->ifalias && - nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) || + nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_changes)) || nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) @@ -1385,27 +1499,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_fill_stats(skb, dev)) goto nla_put_failure; - if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && - nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) + if (rtnl_fill_vf(skb, dev, ext_filter_mask)) goto nla_put_failure; - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent && - ext_filter_mask & RTEXT_FILTER_VF) { - int i; - struct nlattr *vfinfo; - int num_vfs = dev_num_vf(dev->dev.parent); - - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); - if (!vfinfo) - goto nla_put_failure; - for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) - goto nla_put_failure; - } - - nla_nest_end(skb, vfinfo); - } - if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; @@ -1417,51 +1513,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } - if (dev->rtnl_link_ops && - dev->rtnl_link_ops->get_link_net) { - struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); - - if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(dev_net(dev), link_net); - - if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) - goto nla_put_failure; - } - } - - if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + if (rtnl_fill_link_netnsid(skb, dev)) goto nla_put_failure; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { - if (af_ops->fill_link_af) { - struct nlattr *af; - int err; - - if (!(af = nla_nest_start(skb, af_ops->family))) - goto nla_put_failure; - - err = af_ops->fill_link_af(skb, dev, ext_filter_mask); - - /* - * Caller may return ENODATA to indicate that there - * was no data to be dumped. This is not an error, it - * means we should trim the attribute header and - * continue. - */ - if (err == -ENODATA) - nla_nest_cancel(skb, af); - else if (err < 0) - goto nla_put_failure; - - nla_nest_end(skb, af); - } - } + if (new_nsid && + nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) + goto nla_put_failure; - nla_nest_end(skb, af_spec); + rcu_read_lock(); + if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) + goto nla_put_failure_rcu; + rcu_read_unlock(); nlmsg_end(skb, nlh); return 0; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -1661,7 +1729,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0); + ext_filter_mask, 0, NULL); if (err < 0) { if (likely(skb->len)) @@ -1726,17 +1794,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops; - if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + rcu_read_lock(); + af_ops = rtnl_af_lookup(nla_type(af)); + if (!af_ops) { + rcu_read_unlock(); return -EAFNOSUPPORT; + } - if (!af_ops->set_link_af) + if (!af_ops->set_link_af) { + rcu_read_unlock(); return -EOPNOTSUPP; + } if (af_ops->validate_link_af) { err = af_ops->validate_link_af(dev, af); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); return err; + } } + + rcu_read_unlock(); } } @@ -1912,7 +1990,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } -static int do_set_master(struct net_device *dev, int ifindex) +static int do_set_master(struct net_device *dev, int ifindex, + struct netlink_ext_ack *extack) { struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; @@ -1937,7 +2016,7 @@ static int do_set_master(struct net_device *dev, int ifindex) return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { - err = ops->ndo_add_slave(upper_dev, dev); + err = ops->ndo_add_slave(upper_dev, dev, extack); if (err) return err; } else { @@ -2070,7 +2149,7 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -2193,13 +2272,17 @@ static int do_setlink(const struct sk_buff *skb, nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops; - if (!(af_ops = rtnl_af_lookup(nla_type(af)))) - BUG(); + rcu_read_lock(); + + BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); err = af_ops->set_link_af(dev, af); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto errout; + } + rcu_read_unlock(); status |= DO_SETLINK_NOTIFY; } } @@ -2579,12 +2662,6 @@ replay: return err; slave_data = slave_attr; } - if (m_ops->slave_validate) { - err = m_ops->slave_validate(tb, slave_data, - extack); - if (err < 0) - return err; - } } if (dev) { @@ -2714,7 +2791,8 @@ replay: goto out_unregister; } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), + extack); if (err) goto out_unregister; } @@ -2774,7 +2852,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2859,7 +2937,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, - u32 event, gfp_t flags) + u32 event, gfp_t flags, int *new_nsid) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2870,7 +2948,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event, + new_nsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2893,14 +2972,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags) + gfp_t flags, int *new_nsid) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } @@ -2908,9 +2987,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags) { - rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); +} + +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid) +{ + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, + new_nsid); } -EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, @@ -3017,21 +3102,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_add); -static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid) +static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid, + struct netlink_ext_ack *extack) { u16 vid = 0; if (vlan_attr) { if (nla_len(vlan_attr) != sizeof(u16)) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n"); + NL_SET_ERR_MSG(extack, "invalid vlan attribute size"); return -EINVAL; } vid = nla_get_u16(vlan_attr); if (!vid || vid >= VLAN_VID_MASK) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n", - vid); + NL_SET_ERR_MSG(extack, "invalid vlan id"); return -EINVAL; } } @@ -3056,24 +3141,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n"); + NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n"); + NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid); + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -3160,24 +3245,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n"); + NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid); + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -3617,7 +3702,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { - pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } @@ -3692,7 +3777,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { - pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } @@ -3943,25 +4028,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (!attr) goto nla_put_failure; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->fill_stats_af) { struct nlattr *af; int err; af = nla_nest_start(skb, af_ops->family); - if (!af) + if (!af) { + rcu_read_unlock(); goto nla_put_failure; - + } err = af_ops->fill_stats_af(skb, dev); - if (err == -ENODATA) + if (err == -ENODATA) { nla_nest_cancel(skb, af); - else if (err < 0) + } else if (err < 0) { + rcu_read_unlock(); goto nla_put_failure; + } nla_nest_end(skb, af); } } + rcu_read_unlock(); nla_nest_end(skb, attr); @@ -4030,7 +4120,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, /* for IFLA_STATS_AF_SPEC */ size += nla_total_size(0); - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_stats_af_size) { size += nla_total_size( af_ops->get_stats_af_size(dev)); @@ -4039,6 +4130,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, size += nla_total_size(0); } } + rcu_read_unlock(); } return size; @@ -4292,9 +4384,10 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: + case NETDEV_CHANGELOWERSTATE: case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL); + GFP_KERNEL, NULL); break; default: break; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 24656076906d..97e604d55d55 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1354,8 +1354,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) /* Set the tail pointer and length */ skb_put(n, skb->len); - if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) - BUG(); + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); copy_skb_header(n, skb); return n; @@ -1453,8 +1452,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, BUG_ON(nhead < 0); - if (skb_shared(skb)) - BUG(); + BUG_ON(skb_shared(skb)); size = SKB_DATA_ALIGN(size); @@ -1513,6 +1511,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); + skb_metadata_clear(skb); + /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). @@ -1597,9 +1597,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, head_copy_off = newheadroom - head_copy_len; /* Copy the linear header and data. */ - if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, - skb->len + head_copy_len)) - BUG(); + BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, + skb->len + head_copy_len)); copy_skb_header(n, skb); @@ -1880,8 +1879,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) return NULL; } - if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) - BUG(); + BUG_ON(skb_copy_bits(skb, skb_headlen(skb), + skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. @@ -2852,12 +2851,15 @@ EXPORT_SYMBOL(skb_queue_purge); */ void skb_rbtree_purge(struct rb_root *root) { - struct sk_buff *skb, *next; + struct rb_node *p = rb_first(root); - rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) - kfree_skb(skb); + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); - *root = RB_ROOT; + p = rb_next(p); + rb_erase(&skb->rbnode, root); + kfree_skb(skb); + } } /** @@ -4766,6 +4768,7 @@ EXPORT_SYMBOL(kfree_skb_partial); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { + struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; @@ -4780,7 +4783,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } - if (skb_has_frag_list(to) || skb_has_frag_list(from)) + to_shinfo = skb_shinfo(to); + from_shinfo = skb_shinfo(from); + if (to_shinfo->frag_list || from_shinfo->frag_list) return false; if (skb_zcopy(to) || skb_zcopy(from)) return false; @@ -4789,8 +4794,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, struct page *page; unsigned int offset; - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) @@ -4801,12 +4806,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); - skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, + skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); @@ -4814,19 +4819,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, WARN_ON_ONCE(delta < len); - memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, - skb_shinfo(from)->frags, - skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); - skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + memcpy(to_shinfo->frags + to_shinfo->nr_frags, + from_shinfo->frags, + from_shinfo->nr_frags * sizeof(skb_frag_t)); + to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) - skb_shinfo(from)->nr_frags = 0; + from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) - skb_frag_ref(from, i); + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); to->truesize += delta; to->len += len; diff --git a/net/core/sock.c b/net/core/sock.c index 415f441c63b9..759400053110 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2685,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk_init_common(sk); sk->sk_send_head = NULL; - init_timer(&sk->sk_timer); + timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; |