diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-16 00:22:29 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-16 00:22:29 +0300 |
commit | d635a69dd4981cc51f90293f5f64268620ed1565 (patch) | |
tree | 5e0a758b402ea7d624c25c3a343545dd29e80f31 /net/ipv4/nexthop.c | |
parent | ac73e3dc8acd0a3be292755db30388c3580f5674 (diff) | |
parent | efd5a1584537698220578227e6467638307c2a0b (diff) | |
download | linux-d635a69dd4981cc51f90293f5f64268620ed1565.tar.xz |
Merge tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- support "prefer busy polling" NAPI operation mode, where we defer
softirq for some time expecting applications to periodically busy
poll
- AF_XDP: improve efficiency by more batching and hindering the
adjacency cache prefetcher
- af_packet: make packet_fanout.arr size configurable up to 64K
- tcp: optimize TCP zero copy receive in presence of partial or
unaligned reads making zero copy a performance win for much smaller
messages
- XDP: add bulk APIs for returning / freeing frames
- sched: support fragmenting IP packets as they come out of conntrack
- net: allow virtual netdevs to forward UDP L4 and fraglist GSO skbs
BPF:
- BPF switch from crude rlimit-based to memcg-based memory accounting
- BPF type format information for kernel modules and related tracing
enhancements
- BPF implement task local storage for BPF LSM
- allow the FENTRY/FEXIT/RAW_TP tracing programs to use
bpf_sk_storage
Protocols:
- mptcp: improve multiple xmit streams support, memory accounting and
many smaller improvements
- TLS: support CHACHA20-POLY1305 cipher
- seg6: add support for SRv6 End.DT4/DT6 behavior
- sctp: Implement RFC 6951: UDP Encapsulation of SCTP
- ppp_generic: add ability to bridge channels directly
- bridge: Connectivity Fault Management (CFM) support as is defined
in IEEE 802.1Q section 12.14.
Drivers:
- mlx5: make use of the new auxiliary bus to organize the driver
internals
- mlx5: more accurate port TX timestamping support
- mlxsw:
- improve the efficiency of offloaded next hop updates by using
the new nexthop object API
- support blackhole nexthops
- support IEEE 802.1ad (Q-in-Q) bridging
- rtw88: major bluetooth co-existance improvements
- iwlwifi: support new 6 GHz frequency band
- ath11k: Fast Initial Link Setup (FILS)
- mt7915: dual band concurrent (DBDC) support
- net: ipa: add basic support for IPA v4.5
Refactor:
- a few pieces of in_interrupt() cleanup work from Sebastian Andrzej
Siewior
- phy: add support for shared interrupts; get rid of multiple driver
APIs and have the drivers write a full IRQ handler, slight growth
of driver code should be compensated by the simpler API which also
allows shared IRQs
- add common code for handling netdev per-cpu counters
- move TX packet re-allocation from Ethernet switch tag drivers to a
central place
- improve efficiency and rename nla_strlcpy
- number of W=1 warning cleanups as we now catch those in a patchwork
build bot
Old code removal:
- wan: delete the DLCI / SDLA drivers
- wimax: move to staging
- wifi: remove old WDS wifi bridging support"
* tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1922 commits)
net: hns3: fix expression that is currently always true
net: fix proc_fs init handling in af_packet and tls
nfc: pn533: convert comma to semicolon
af_vsock: Assign the vsock transport considering the vsock address flags
af_vsock: Set VMADDR_FLAG_TO_HOST flag on the receive path
vsock_addr: Check for supported flag values
vm_sockets: Add VMADDR_FLAG_TO_HOST vsock flag
vm_sockets: Add flags field in the vsock address data structure
net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled
tcp: Add logic to check for SYN w/ data in tcp_simple_retransmit
net: mscc: ocelot: install MAC addresses in .ndo_set_rx_mode from process context
nfc: s3fwrn5: Release the nfc firmware
net: vxget: clean up sparse warnings
mlxsw: spectrum_router: Use eXtended mezzanine to offload IPv4 router
mlxsw: spectrum: Set KVH XLT cache mode for Spectrum2/3
mlxsw: spectrum_router_xm: Introduce basic XM cache flushing
mlxsw: reg: Add Router LPM Cache Enable Register
mlxsw: reg: Add Router LPM Cache ML Delete Register
mlxsw: spectrum_router_xm: Implement L-value tracking for M-index
mlxsw: reg: Add XM Router M Table Register
...
Diffstat (limited to 'net/ipv4/nexthop.c')
-rw-r--r-- | net/ipv4/nexthop.c | 255 |
1 files changed, 244 insertions, 11 deletions
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0dc43ad28eb9..5e1b22d4f939 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -36,14 +36,145 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_FDB] = { .type = NLA_FLAG }, }; +static bool nexthop_notifiers_is_empty(struct net *net) +{ + return !net->nexthop.notifier_chain.head; +} + +static void +__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, + const struct nexthop *nh) +{ + struct nh_info *nhi = rtnl_dereference(nh->nh_info); + + nh_info->dev = nhi->fib_nhc.nhc_dev; + nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; + if (nh_info->gw_family == AF_INET) + nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; + else if (nh_info->gw_family == AF_INET6) + nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; + + nh_info->is_reject = nhi->reject_nh; + nh_info->is_fdb = nhi->fdb_nh; + nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; +} + +static int nh_notifier_single_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); + if (!info->nh) + return -ENOMEM; + + __nh_notifier_single_info_init(info->nh, nh); + + return 0; +} + +static void nh_notifier_single_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh); +} + +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + u16 num_nh = nhg->num_nh; + int i; + + info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), + GFP_KERNEL); + if (!info->nh_grp) + return -ENOMEM; + + info->nh_grp->num_nh = num_nh; + info->nh_grp->is_fdb = nhg->fdb_nh; + + for (i = 0; i < num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + info->nh_grp->nh_entries[i].id = nhge->nh->id; + info->nh_grp->nh_entries[i].weight = nhge->weight; + __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, + nhge->nh); + } + + return 0; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh_grp); +} + +static int nh_notifier_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->id = nh->id; + info->is_grp = nh->is_group; + + if (info->is_grp) + return nh_notifier_grp_info_init(info, nh); + else + return nh_notifier_single_info_init(info, nh); +} + +static void nh_notifier_info_fini(struct nh_notifier_info *info) +{ + if (info->is_grp) + nh_notifier_grp_info_fini(info); + else + nh_notifier_single_info_fini(info); +} + static int call_nexthop_notifiers(struct net *net, enum nexthop_event_type event_type, - struct nexthop *nh) + struct nexthop *nh, + struct netlink_ext_ack *extack) { + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; int err; + ASSERT_RTNL(); + + if (nexthop_notifiers_is_empty(net)) + return 0; + + err = nh_notifier_info_init(&info, nh); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); + return err; + } + err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, - event_type, nh); + event_type, &info); + nh_notifier_info_fini(&info); + + return notifier_to_errno(err); +} + +static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, + enum nexthop_event_type event_type, + struct nexthop *nh, + struct netlink_ext_ack *extack) +{ + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; + int err; + + err = nh_notifier_info_init(&info, nh); + if (err) + return err; + + err = nb->notifier_call(nb, event_type, &info); + nh_notifier_info_fini(&info); + return notifier_to_errno(err); } @@ -782,9 +913,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; + struct netlink_ext_ack extack; struct nexthop *nh = nhge->nh; struct nh_group *nhg, *newg; - int i, j; + int i, j, err; WARN_ON(!nh); @@ -832,6 +964,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, list_del(&nhge->nh_list); nexthop_put(nhge->nh); + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); + if (err) + pr_err("%s\n", extack._msg); + if (nlinfo) nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); } @@ -907,7 +1043,7 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh, static void remove_nexthop(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { - call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); /* remove from the tree */ rb_erase(&nh->rb_node, &net->nexthop.rb_root); @@ -940,13 +1076,17 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, struct netlink_ext_ack *extack) { struct nh_group *oldg, *newg; - int i; + int i, err; if (!new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + oldg = rtnl_dereference(old->nh_grp); newg = rtnl_dereference(new->nh_grp); @@ -985,31 +1125,54 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, struct nexthop *new, struct netlink_ext_ack *extack) { + u8 old_protocol, old_nh_flags; struct nh_info *oldi, *newi; + struct nh_grp_entry *nhge; + int err; if (new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + + /* Hardware flags were set on 'old' as 'new' is not in the red-black + * tree. Therefore, inherit the flags from 'old' to 'new'. + */ + new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); + oldi = rtnl_dereference(old->nh_info); newi = rtnl_dereference(new->nh_info); newi->nh_parent = old; oldi->nh_parent = new; + old_protocol = old->protocol; + old_nh_flags = old->nh_flags; + old->protocol = new->protocol; old->nh_flags = new->nh_flags; rcu_assign_pointer(old->nh_info, newi); rcu_assign_pointer(new->nh_info, oldi); + /* Send a replace notification for all the groups using the nexthop. */ + list_for_each_entry(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, + extack); + if (err) + goto err_notify; + } + /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially * update IPv4 indication in all the groups using the nexthop. */ if (oldi->family == AF_INET && newi->family == AF_INET6) { - struct nh_grp_entry *nhge; - list_for_each_entry(nhge, &old->grp_list, nh_list) { struct nexthop *nhp = nhge->nh_parent; struct nh_group *nhg; @@ -1020,6 +1183,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, } return 0; + +err_notify: + rcu_assign_pointer(new->nh_info, newi); + rcu_assign_pointer(old->nh_info, oldi); + old->nh_flags = old_nh_flags; + old->protocol = old_protocol; + oldi->nh_parent = old; + newi->nh_parent = new; + list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack); + } + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); + return err; } static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, @@ -1168,7 +1346,11 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, rb_link_node_rcu(&new_nh->rb_node, parent, pp); rb_insert_color(&new_nh->rb_node, root); - rc = 0; + + rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); + if (rc) + rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); + out: if (!rc) { nh_base_seq_inc(net); @@ -1957,10 +2139,40 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +static int nexthops_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct rb_root *root = &net->nexthop.rb_root; + struct rb_node *node; + int err = 0; + + for (node = rb_first(root); node; node = rb_next(node)) { + struct nexthop *nh; + + nh = rb_entry(node, struct nexthop, rb_node); + err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, + extack); + if (err) + break; + } + + return err; +} + +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return blocking_notifier_chain_register(&net->nexthop.notifier_chain, - nb); + int err; + + rtnl_lock(); + err = nexthops_dump(net, nb, extack); + if (err) + goto unlock; + err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, + nb); +unlock: + rtnl_unlock(); + return err; } EXPORT_SYMBOL(register_nexthop_notifier); @@ -1971,6 +2183,27 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) } EXPORT_SYMBOL(unregister_nexthop_notifier); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) +{ + struct nexthop *nexthop; + + rcu_read_lock(); + + nexthop = nexthop_find_by_id(net, id); + if (!nexthop) + goto out; + + nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); + if (offload) + nexthop->nh_flags |= RTNH_F_OFFLOAD; + if (trap) + nexthop->nh_flags |= RTNH_F_TRAP; + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL(nexthop_set_hw_flags); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); |