diff options
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r-- | drivers/net/vrf.c | 431 |
1 files changed, 157 insertions, 274 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9a9fabb900c1..dff08842f26d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -42,12 +42,9 @@ #define DRV_NAME "vrf" #define DRV_VERSION "1.0" -#define vrf_master_get_rcu(dev) \ - ((struct net_device *)rcu_dereference(dev->rx_handler_data)) - struct net_vrf { - struct rtable *rth; - struct rt6_info *rt6; + struct rtable __rcu *rth; + struct rt6_info __rcu *rt6; u32 tb_id; }; @@ -60,125 +57,12 @@ struct pcpu_dstats { struct u64_stats_sync syncp; }; -static struct dst_entry *vrf_ip_check(struct dst_entry *dst, u32 cookie) -{ - return dst; -} - -static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - return ip_local_out(net, sk, skb); -} - -static unsigned int vrf_v4_mtu(const struct dst_entry *dst) -{ - /* TO-DO: return max ethernet size? */ - return dst->dev->mtu; -} - -static void vrf_dst_destroy(struct dst_entry *dst) -{ - /* our dst lives forever - or until the device is closed */ -} - -static unsigned int vrf_default_advmss(const struct dst_entry *dst) -{ - return 65535 - 40; -} - -static struct dst_ops vrf_dst_ops = { - .family = AF_INET, - .local_out = vrf_ip_local_out, - .check = vrf_ip_check, - .mtu = vrf_v4_mtu, - .destroy = vrf_dst_destroy, - .default_advmss = vrf_default_advmss, -}; - -/* neighbor handling is done with actual device; do not want - * to flip skb->dev for those ndisc packets. This really fails - * for multiple next protocols (e.g., NEXTHDR_HOP). But it is - * a start. - */ -#if IS_ENABLED(CONFIG_IPV6) -static bool check_ipv6_frame(const struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct ipv6hdr _ipv6h; - bool rc = true; - - ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h); - if (!ipv6h) - goto out; - - if (ipv6h->nexthdr == NEXTHDR_ICMP) { - const struct icmp6hdr *icmph; - struct icmp6hdr _icmph; - - icmph = skb_header_pointer(skb, sizeof(_ipv6h), - sizeof(_icmph), &_icmph); - if (!icmph) - goto out; - - switch (icmph->icmp6_type) { - case NDISC_ROUTER_SOLICITATION: - case NDISC_ROUTER_ADVERTISEMENT: - case NDISC_NEIGHBOUR_SOLICITATION: - case NDISC_NEIGHBOUR_ADVERTISEMENT: - case NDISC_REDIRECT: - rc = false; - break; - } - } - -out: - return rc; -} -#else -static bool check_ipv6_frame(const struct sk_buff *skb) -{ - return false; -} -#endif - -static bool is_ip_rx_frame(struct sk_buff *skb) -{ - switch (skb->protocol) { - case htons(ETH_P_IP): - return true; - case htons(ETH_P_IPV6): - return check_ipv6_frame(skb); - } - return false; -} - static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } -/* note: already called with rcu_read_lock */ -static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb) -{ - struct sk_buff *skb = *pskb; - - if (is_ip_rx_frame(skb)) { - struct net_device *dev = vrf_master_get_rcu(skb->dev); - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; - dstats->rx_bytes += skb->len; - u64_stats_update_end(&dstats->syncp); - - skb->dev = dev; - - return RX_HANDLER_ANOTHER; - } - return RX_HANDLER_PASS; -} - static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -349,46 +233,6 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie) -{ - return dst; -} - -static struct dst_ops vrf_dst_ops6 = { - .family = AF_INET6, - .local_out = ip6_local_out, - .check = vrf_ip6_check, - .mtu = vrf_v4_mtu, - .destroy = vrf_dst_destroy, - .default_advmss = vrf_default_advmss, -}; - -static int init_dst_ops6_kmem_cachep(void) -{ - vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache", - sizeof(struct rt6_info), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - - if (!vrf_dst_ops6.kmem_cachep) - return -ENOMEM; - - return 0; -} - -static void free_dst_ops6_kmem_cachep(void) -{ - kmem_cache_destroy(vrf_dst_ops6.kmem_cachep); -} - -static int vrf_input6(struct sk_buff *skb) -{ - skb->dev->stats.rx_errors++; - kfree_skb(skb); - return 0; -} - /* modelled after ip6_finish_output2 */ static int vrf_finish_output6(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -429,67 +273,46 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static void vrf_rt6_destroy(struct net_vrf *vrf) +/* holding rtnl */ +static void vrf_rt6_release(struct net_vrf *vrf) { - dst_destroy(&vrf->rt6->dst); - free_percpu(vrf->rt6->rt6i_pcpu); - vrf->rt6 = NULL; + struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); + + rcu_assign_pointer(vrf->rt6, NULL); + + if (rt6) + dst_release(&rt6->dst); } static int vrf_rt6_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - struct dst_entry *dst; + struct net *net = dev_net(dev); + struct fib6_table *rt6i_table; struct rt6_info *rt6; - int cpu; int rc = -ENOMEM; - rt6 = dst_alloc(&vrf_dst_ops6, dev, 0, - DST_OBSOLETE_NONE, - (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); - if (!rt6) + rt6i_table = fib6_new_table(net, vrf->tb_id); + if (!rt6i_table) goto out; - dst = &rt6->dst; - - rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL); - if (!rt6->rt6i_pcpu) { - dst_destroy(dst); + rt6 = ip6_dst_alloc(net, dev, + DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE); + if (!rt6) goto out; - } - for_each_possible_cpu(cpu) { - struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu); - *p = NULL; - } - - memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst)); - INIT_LIST_HEAD(&rt6->rt6i_siblings); - INIT_LIST_HEAD(&rt6->rt6i_uncached); + dst_hold(&rt6->dst); - rt6->dst.input = vrf_input6; + rt6->rt6i_table = rt6i_table; rt6->dst.output = vrf_output6; + rcu_assign_pointer(vrf->rt6, rt6); - rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id); - - atomic_set(&rt6->dst.__refcnt, 2); - - vrf->rt6 = rt6; rc = 0; out: return rc; } #else -static int init_dst_ops6_kmem_cachep(void) -{ - return 0; -} - -static void free_dst_ops6_kmem_cachep(void) -{ -} - -static void vrf_rt6_destroy(struct net_vrf *vrf) +static void vrf_rt6_release(struct net_vrf *vrf) { } @@ -557,38 +380,35 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -static void vrf_rtable_destroy(struct net_vrf *vrf) +/* holding rtnl */ +static void vrf_rtable_release(struct net_vrf *vrf) { - struct dst_entry *dst = (struct dst_entry *)vrf->rth; + struct rtable *rth = rtnl_dereference(vrf->rth); - dst_destroy(dst); - vrf->rth = NULL; + rcu_assign_pointer(vrf->rth, NULL); + + if (rth) + dst_release(&rth->dst); } -static struct rtable *vrf_rtable_create(struct net_device *dev) +static int vrf_rtable_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); struct rtable *rth; - rth = dst_alloc(&vrf_dst_ops, dev, 2, - DST_OBSOLETE_NONE, - (DST_HOST | DST_NOPOLICY | DST_NOXFRM)); - if (rth) { - rth->dst.output = vrf_output; - rth->rt_genid = rt_genid_ipv4(dev_net(dev)); - rth->rt_flags = 0; - rth->rt_type = RTN_UNICAST; - rth->rt_is_input = 0; - rth->rt_iif = 0; - rth->rt_pmtu = 0; - rth->rt_gateway = 0; - rth->rt_uses_gateway = 0; - rth->rt_table_id = vrf->tb_id; - INIT_LIST_HEAD(&rth->rt_uncached); - rth->rt_uncached_list = NULL; - } + if (!fib_new_table(dev_net(dev), vrf->tb_id)) + return -ENOMEM; - return rth; + rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0); + if (!rth) + return -ENOMEM; + + rth->dst.output = vrf_output; + rth->rt_table_id = vrf->tb_id; + + rcu_assign_pointer(vrf->rth, rth); + + return 0; } /**************************** device handling ********************/ @@ -617,28 +437,14 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { int ret; - /* register the packet handler for slave ports */ - ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev); - if (ret) { - netdev_err(port_dev, - "Device %s failed to register rx_handler\n", - port_dev->name); - goto out_fail; - } - ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) - goto out_unregister; + return ret; port_dev->priv_flags |= IFF_L3MDEV_SLAVE; cycle_netdev(port_dev); return 0; - -out_unregister: - netdev_rx_handler_unregister(port_dev); -out_fail: - return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) @@ -655,8 +461,6 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; - netdev_rx_handler_unregister(port_dev); - cycle_netdev(port_dev); return 0; @@ -673,8 +477,8 @@ static void vrf_dev_uninit(struct net_device *dev) struct net_device *port_dev; struct list_head *iter; - vrf_rtable_destroy(vrf); - vrf_rt6_destroy(vrf); + vrf_rtable_release(vrf); + vrf_rt6_release(vrf); netdev_for_each_lower_dev(dev, port_dev, iter) vrf_del_slave(dev, port_dev); @@ -692,8 +496,7 @@ static int vrf_dev_init(struct net_device *dev) goto out_nomem; /* create the default dst which points back to us */ - vrf->rth = vrf_rtable_create(dev); - if (!vrf->rth) + if (vrf_rtable_create(dev) != 0) goto out_stats; if (vrf_rt6_create(dev) != 0) @@ -704,7 +507,7 @@ static int vrf_dev_init(struct net_device *dev) return 0; out_rth: - vrf_rtable_destroy(vrf); + vrf_rtable_release(vrf); out_stats: free_percpu(dev->dstats); dev->dstats = NULL; @@ -736,8 +539,13 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); - rth = vrf->rth; - atomic_inc(&rth->dst.__refcnt); + rcu_read_lock(); + + rth = rcu_dereference(vrf->rth); + if (likely(rth)) + dst_hold(&rth->dst); + + rcu_read_unlock(); } return rth; @@ -759,6 +567,8 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; fl4->flowi4_iif = LOOPBACK_IFINDEX; + /* make sure oif is set to VRF device for lookup */ + fl4->flowi4_oif = dev->ifindex; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); @@ -779,19 +589,116 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) } #if IS_ENABLED(CONFIG_IPV6) +/* neighbor handling is done with actual device; do not want + * to flip skb->dev for those ndisc packets. This really fails + * for multiple next protocols (e.g., NEXTHDR_HOP). But it is + * a start. + */ +static bool ipv6_ndisc_frame(const struct sk_buff *skb) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + bool rc = false; + + if (iph->nexthdr == NEXTHDR_ICMP) { + const struct icmp6hdr *icmph; + struct icmp6hdr _icmph; + + icmph = skb_header_pointer(skb, sizeof(*iph), + sizeof(_icmph), &_icmph); + if (!icmph) + goto out; + + switch (icmph->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + rc = true; + break; + } + } + +out: + return rc; +} + +static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + /* if packet is NDISC keep the ingress interface */ + if (!ipv6_ndisc_frame(skb)) { + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + + IP6CB(skb)->flags |= IP6SKB_L3SLAVE; + } + + return skb; +} + +#else +static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + return skb; +} +#endif + +static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_rcv(vrf_dev, skb); + case AF_INET6: + return vrf_ip6_rcv(vrf_dev, skb); + } + + return skb; +} + +#if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, const struct flowi6 *fl6) { - struct rt6_info *rt = NULL; + struct dst_entry *dst = NULL; if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); + struct rt6_info *rt; - rt = vrf->rt6; - atomic_inc(&rt->dst.__refcnt); + rcu_read_lock(); + + rt = rcu_dereference(vrf->rt6); + if (likely(rt)) { + dst = &rt->dst; + dst_hold(dst); + } + + rcu_read_unlock(); } - return (struct dst_entry *)rt; + return dst; } #endif @@ -799,6 +706,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, + .l3mdev_l3_rcv = vrf_l3_rcv, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, #endif @@ -946,19 +854,6 @@ static int __init vrf_init_module(void) { int rc; - vrf_dst_ops.kmem_cachep = - kmem_cache_create("vrf_ip_dst_cache", - sizeof(struct rtable), 0, - SLAB_HWCACHE_ALIGN, - NULL); - - if (!vrf_dst_ops.kmem_cachep) - return -ENOMEM; - - rc = init_dst_ops6_kmem_cachep(); - if (rc != 0) - goto error2; - register_netdevice_notifier(&vrf_notifier_block); rc = rtnl_link_register(&vrf_link_ops); @@ -969,22 +864,10 @@ static int __init vrf_init_module(void) error: unregister_netdevice_notifier(&vrf_notifier_block); - free_dst_ops6_kmem_cachep(); -error2: - kmem_cache_destroy(vrf_dst_ops.kmem_cachep); return rc; } -static void __exit vrf_cleanup_module(void) -{ - rtnl_link_unregister(&vrf_link_ops); - unregister_netdevice_notifier(&vrf_notifier_block); - kmem_cache_destroy(vrf_dst_ops.kmem_cachep); - free_dst_ops6_kmem_cachep(); -} - module_init(vrf_init_module); -module_exit(vrf_cleanup_module); MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern"); MODULE_DESCRIPTION("Device driver to instantiate VRF domains"); MODULE_LICENSE("GPL"); |