summaryrefslogtreecommitdiff
path: root/net/l3mdev
diff options
context:
space:
mode:
authorDavid Ahern <dsahern@kernel.org>2022-03-14 23:45:51 +0300
committerJakub Kicinski <kuba@kernel.org>2022-03-16 06:20:02 +0300
commit40867d74c374b235e14d839f3a77f26684feefe5 (patch)
treec48cb6e159ac445e5171fe550d9c58437d46a0e0 /net/l3mdev
parentc84d86a0295c24487db5b7db1a61d9c0eddfbb66 (diff)
downloadlinux-40867d74c374b235e14d839f3a77f26684feefe5.tar.xz
net: Add l3mdev index to flow struct and avoid oif reset for port devices
The fundamental premise of VRF and l3mdev core code is binding a socket to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. Legacy code resets flowi_oif to the l3mdev losing any original port device binding. Ben (among others) has demonstrated use cases where the original port device binding is important and needs to be retained. This patch handles that by adding a new entry to the common flow struct that can indicate the l3mdev index for later rule and table matching avoiding the need to reset flowi_oif. In addition to allowing more use cases that require port device binds, this patch brings a few datapath simplications: 1. l3mdev_fib_rule_match is only called when walking fib rules and always after l3mdev_update_flow. That allows an optimization to bail early for non-VRF type uses cases when flowi_l3mdev is not set. Also, only that index needs to be checked for the FIB table id. 2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev (e.g., VRF) device. By resetting flowi_oif only for this case the FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, removing several checks in the datapath. The flowi_iif path can be simplified to only be called if the it is not loopback (loopback can not be assigned to an L3 domain) and the l3mdev index is not already set. 3. Avoid another device lookup in the output path when the fib lookup returns a reject failure. Note: 2 functional tests for local traffic with reject fib rules are updated to reflect the new direct failure at FIB lookup time for ping rather than the failure on packet path. The current code fails like this: HINT: Fails since address on vrf device is out of device scope COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 ping: Warning: source address might be selected on device other than: eth1 PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. --- 172.16.3.1 ping statistics --- 1 packets transmitted, 0 received, 100% packet loss, time 0ms where the test now directly fails: HINT: Fails since address on vrf device is out of device scope COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 ping: connect: No route to host Signed-off-by: David Ahern <dsahern@kernel.org> Tested-by: Ben Greear <greearb@candelatech.com> Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/l3mdev')
-rw-r--r--net/l3mdev/l3mdev.c43
1 files changed, 17 insertions, 26 deletions
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 17927966abb3..4eb8892fb2ff 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct net_device *dev;
int rc = 0;
- rcu_read_lock();
+ /* update flow ensures flowi_l3mdev is set when relevant */
+ if (!fl->flowi_l3mdev)
+ return 0;
- dev = dev_get_by_index_rcu(net, fl->flowi_oif);
- if (dev && netif_is_l3_master(dev) &&
- dev->l3mdev_ops->l3mdev_fib_table) {
- arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
- rc = 1;
- goto out;
- }
+ rcu_read_lock();
- dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+ dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_fib_table) {
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
rc = 1;
- goto out;
}
-out:
rcu_read_unlock();
return rc;
@@ -277,31 +271,28 @@ out:
void l3mdev_update_flow(struct net *net, struct flowi *fl)
{
struct net_device *dev;
- int ifindex;
rcu_read_lock();
if (fl->flowi_oif) {
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
if (dev) {
- ifindex = l3mdev_master_ifindex_rcu(dev);
- if (ifindex) {
- fl->flowi_oif = ifindex;
- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
- goto out;
- }
+ if (!fl->flowi_l3mdev)
+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
+
+ /* oif set to L3mdev directs lookup to its table;
+ * reset to avoid oif match in fib_lookup
+ */
+ if (netif_is_l3_master(dev))
+ fl->flowi_oif = 0;
+ goto out;
}
}
- if (fl->flowi_iif) {
+ if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
dev = dev_get_by_index_rcu(net, fl->flowi_iif);
- if (dev) {
- ifindex = l3mdev_master_ifindex_rcu(dev);
- if (ifindex) {
- fl->flowi_iif = ifindex;
- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
- }
- }
+ if (dev)
+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
}
out: