summaryrefslogtreecommitdiff
path: root/drivers/net/vrf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r--drivers/net/vrf.c294
1 files changed, 146 insertions, 148 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 1ce7420322ee..85c271c70d42 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -37,9 +37,6 @@
#include <net/l3mdev.h>
#include <net/fib_rules.h>
-#define RT_FL_TOS(oldflp4) \
- ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
@@ -137,6 +134,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
}
#if IS_ENABLED(CONFIG_IPV6)
+static int vrf_ip6_local_out(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
+ sk, skb, NULL, skb_dst(skb)->dev, dst_output);
+
+ if (likely(err == 1))
+ err = dst_output(net, sk, skb);
+
+ return err;
+}
+
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
struct net_device *dev)
{
@@ -151,7 +162,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
.flowlabel = ip6_flowinfo(iph),
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
- .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
+ .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
};
int ret = NET_XMIT_DROP;
struct dst_entry *dst;
@@ -207,7 +218,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
/* strip the ethernet header added for pass through VRF device */
__skb_pull(skb, skb_network_offset(skb));
- ret = ip6_local_out(net, skb->sk, skb);
+ ret = vrf_ip6_local_out(net, skb->sk, skb);
if (unlikely(net_xmit_eval(ret)))
dev->stats.tx_errors++;
else
@@ -227,6 +238,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
}
#endif
+/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
+static int vrf_ip_local_out(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, skb_dst(skb)->dev, dst_output);
+ if (likely(err == 1))
+ err = dst_output(net, sk, skb);
+
+ return err;
+}
+
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
struct net_device *vrf_dev)
{
@@ -237,8 +262,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
.flowi4_oif = vrf_dev->ifindex,
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_tos = RT_TOS(ip4h->tos),
- .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF,
+ .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
.daddr = ip4h->daddr,
};
struct net *net = dev_net(vrf_dev);
@@ -292,7 +316,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
RT_SCOPE_LINK);
}
- ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+ ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
if (unlikely(net_xmit_eval(ret)))
vrf_dev->stats.tx_errors++;
else
@@ -377,6 +401,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct net_vrf *vrf = netdev_priv(vrf_dev);
+ struct dst_entry *dst = NULL;
+ struct rt6_info *rt6;
+
+ /* don't divert link scope packets */
+ if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+ return skb;
+
+ rcu_read_lock();
+
+ rt6 = rcu_dereference(vrf->rt6);
+ if (likely(rt6)) {
+ dst = &rt6->dst;
+ dst_hold(dst);
+ }
+
+ rcu_read_unlock();
+
+ if (unlikely(!dst)) {
+ vrf_tx_error(vrf_dev, skb);
+ return NULL;
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ return skb;
+}
+
/* holding rtnl */
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
@@ -463,6 +524,13 @@ out:
return rc;
}
#else
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ return skb;
+}
+
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
}
@@ -531,6 +599,55 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct net_vrf *vrf = netdev_priv(vrf_dev);
+ struct dst_entry *dst = NULL;
+ struct rtable *rth;
+
+ rcu_read_lock();
+
+ rth = rcu_dereference(vrf->rth);
+ if (likely(rth)) {
+ dst = &rth->dst;
+ dst_hold(dst);
+ }
+
+ rcu_read_unlock();
+
+ if (unlikely(!dst)) {
+ vrf_tx_error(vrf_dev, skb);
+ return NULL;
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb,
+ u16 proto)
+{
+ switch (proto) {
+ case AF_INET:
+ return vrf_ip_out(vrf_dev, sk, skb);
+ case AF_INET6:
+ return vrf_ip6_out(vrf_dev, sk, skb);
+ }
+
+ return skb;
+}
+
/* holding rtnl */
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
@@ -722,63 +839,6 @@ static u32 vrf_fib_table(const struct net_device *dev)
return vrf->tb_id;
}
-static struct rtable *vrf_get_rtable(const struct net_device *dev,
- const struct flowi4 *fl4)
-{
- struct rtable *rth = NULL;
-
- if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
- struct net_vrf *vrf = netdev_priv(dev);
-
- rcu_read_lock();
-
- rth = rcu_dereference(vrf->rth);
- if (likely(rth))
- dst_hold(&rth->dst);
-
- rcu_read_unlock();
- }
-
- return rth;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
-{
- struct fib_result res = { .tclassid = 0 };
- struct net *net = dev_net(dev);
- u32 orig_tos = fl4->flowi4_tos;
- u8 flags = fl4->flowi4_flags;
- u8 scope = fl4->flowi4_scope;
- u8 tos = RT_FL_TOS(fl4);
- int rc;
-
- if (unlikely(!fl4->daddr))
- return 0;
-
- fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
- fl4->flowi4_iif = LOOPBACK_IFINDEX;
- /* make sure oif is set to VRF device for lookup */
- fl4->flowi4_oif = dev->ifindex;
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
-
- rc = fib_lookup(net, fl4, &res, 0);
- if (!rc) {
- if (res.type == RTN_LOCAL)
- fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
- else
- fib_select_path(net, &res, fl4, -1);
- }
-
- fl4->flowi4_flags = flags;
- fl4->flowi4_tos = orig_tos;
- fl4->flowi4_scope = scope;
-
- return rc;
-}
-
static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return 0;
@@ -970,106 +1030,44 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
- struct flowi6 *fl6)
+/* send to link-local or multicast address via interface enslaved to
+ * VRF device. Force lookup to VRF table without changing flow struct
+ */
+static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
+ struct flowi6 *fl6)
{
- bool need_strict = rt6_need_strict(&fl6->daddr);
- struct net_vrf *vrf = netdev_priv(dev);
struct net *net = dev_net(dev);
+ int flags = RT6_LOOKUP_F_IFACE;
struct dst_entry *dst = NULL;
struct rt6_info *rt;
- /* send to link-local or multicast address */
- if (need_strict) {
- int flags = RT6_LOOKUP_F_IFACE;
-
- /* VRF device does not have a link-local address and
- * sending packets to link-local or mcast addresses over
- * a VRF device does not make sense
- */
- if (fl6->flowi6_oif == dev->ifindex) {
- struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
-
- dst_hold(dst);
- return dst;
- }
-
- if (!ipv6_addr_any(&fl6->saddr))
- flags |= RT6_LOOKUP_F_HAS_SADDR;
-
- rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
- if (rt)
- dst = &rt->dst;
-
- } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-
- rcu_read_lock();
-
- rt = rcu_dereference(vrf->rt6);
- if (likely(rt)) {
- dst = &rt->dst;
- dst_hold(dst);
- }
-
- rcu_read_unlock();
+ /* VRF device does not have a link-local address and
+ * sending packets to link-local or mcast addresses over
+ * a VRF device does not make sense
+ */
+ if (fl6->flowi6_oif == dev->ifindex) {
+ dst = &net->ipv6.ip6_null_entry->dst;
+ dst_hold(dst);
+ return dst;
}
- /* make sure oif is set to VRF device for lookup */
- if (!need_strict)
- fl6->flowi6_oif = dev->ifindex;
-
- return dst;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
- struct flowi6 *fl6)
-{
- struct net *net = dev_net(dev);
- struct dst_entry *dst;
- struct rt6_info *rt;
- int err;
-
- if (rt6_need_strict(&fl6->daddr)) {
- rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
- RT6_LOOKUP_F_IFACE);
- if (unlikely(!rt))
- return 0;
+ if (!ipv6_addr_any(&fl6->saddr))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+ rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+ if (rt)
dst = &rt->dst;
- } else {
- __u8 flags = fl6->flowi6_flags;
- fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
- fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-
- dst = ip6_route_output(net, sk, fl6);
- rt = (struct rt6_info *)dst;
-
- fl6->flowi6_flags = flags;
- }
-
- err = dst->error;
- if (!err) {
- err = ip6_route_get_saddr(net, rt, &fl6->daddr,
- sk ? inet6_sk(sk)->srcprefs : 0,
- &fl6->saddr);
- }
-
- dst_release(dst);
-
- return err;
+ return dst;
}
#endif
static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
- .l3mdev_get_rtable = vrf_get_rtable,
- .l3mdev_get_saddr = vrf_get_saddr,
.l3mdev_l3_rcv = vrf_l3_rcv,
+ .l3mdev_l3_out = vrf_l3_out,
#if IS_ENABLED(CONFIG_IPV6)
- .l3mdev_get_rt6_dst = vrf_get_rt6_dst,
- .l3mdev_get_saddr6 = vrf_get_saddr6,
+ .l3mdev_link_scope_lookup = vrf_link_scope_lookup,
#endif
};