diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-03 07:53:45 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-03 07:53:45 +0400 |
commit | cd6362befe4cc7bf589a5236d2a780af2d47bcc9 (patch) | |
tree | 3bd4e13ec3f92a00dc4f6c3d65e820b54dbfe46e /net/ipv4/ip_vti.c | |
parent | 0f1b1e6d73cb989ce2c071edc57deade3b084dfe (diff) | |
parent | b1586f099ba897542ece36e8a23c1a62907261ef (diff) | |
download | linux-cd6362befe4cc7bf589a5236d2a780af2d47bcc9.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Here is my initial pull request for the networking subsystem during
this merge window:
1) Support for ESN in AH (RFC 4302) from Fan Du.
2) Add full kernel doc for ethtool command structures, from Ben
Hutchings.
3) Add BCM7xxx PHY driver, from Florian Fainelli.
4) Export computed TCP rate information in netlink socket dumps, from
Eric Dumazet.
5) Allow IPSEC SA to be dumped partially using a filter, from Nicolas
Dichtel.
6) Convert many drivers to pci_enable_msix_range(), from Alexander
Gordeev.
7) Record SKB timestamps more efficiently, from Eric Dumazet.
8) Switch to microsecond resolution for TCP round trip times, also
from Eric Dumazet.
9) Clean up and fix 6lowpan fragmentation handling by making use of
the existing inet_frag api for it's implementation.
10) Add TX grant mapping to xen-netback driver, from Zoltan Kiss.
11) Auto size SKB lengths when composing netlink messages based upon
past message sizes used, from Eric Dumazet.
12) qdisc dumps can take a long time, add a cond_resched(), From Eric
Dumazet.
13) Sanitize netpoll core and drivers wrt. SKB handling semantics.
Get rid of never-used-in-tree netpoll RX handling. From Eric W
Biederman.
14) Support inter-address-family and namespace changing in VTI tunnel
driver(s). From Steffen Klassert.
15) Add Altera TSE driver, from Vince Bridgers.
16) Optimizing csum_replace2() so that it doesn't adjust the checksum
by checksumming the entire header, from Eric Dumazet.
17) Expand BPF internal implementation for faster interpreting, more
direct translations into JIT'd code, and much cleaner uses of BPF
filtering in non-socket ocntexts. From Daniel Borkmann and Alexei
Starovoitov"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1976 commits)
netpoll: Use skb_irq_freeable to make zap_completion_queue safe.
net: Add a test to see if a skb is freeable in irq context
qlcnic: Fix build failure due to undefined reference to `vxlan_get_rx_port'
net: ptp: move PTP classifier in its own file
net: sxgbe: make "core_ops" static
net: sxgbe: fix logical vs bitwise operation
net: sxgbe: sxgbe_mdio_register() frees the bus
Call efx_set_channels() before efx->type->dimension_resources()
xen-netback: disable rogue vif in kthread context
net/mlx4: Set proper build dependancy with vxlan
be2net: fix build dependency on VxLAN
mac802154: make csma/cca parameters per-wpan
mac802154: allow only one WPAN to be up at any given time
net: filter: minor: fix kdoc in __sk_run_filter
netlink: don't compare the nul-termination in nla_strcmp
can: c_can: Avoid led toggling for every packet.
can: c_can: Simplify TX interrupt cleanup
can: c_can: Store dlc private
can: c_can: Reduce register access
can: c_can: Make the code readable
...
Diffstat (limited to 'net/ipv4/ip_vti.c')
-rw-r--r-- | net/ipv4/ip_vti.c | 310 |
1 files changed, 244 insertions, 66 deletions
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 48eafae51769..687ddef4e574 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -34,6 +34,7 @@ #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> +#include <linux/icmpv6.h> #include <net/sock.h> #include <net/ip.h> @@ -49,8 +50,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; static int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); -/* We dont digest the packet therefore let the packet pass */ -static int vti_rcv(struct sk_buff *skb) +static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); @@ -60,79 +61,120 @@ static int vti_rcv(struct sk_buff *skb) tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel != NULL) { - struct pcpu_sw_netstats *tstats; - u32 oldmark = skb->mark; - int ret; - - - /* temporarily mark the skb with the tunnel o_key, to - * only match policies with this mark. - */ - skb->mark = be32_to_cpu(tunnel->parms.o_key); - ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); - skb->mark = oldmark; - if (!ret) - return -1; - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - secpath_reset(skb); - skb->dev = tunnel->dev; + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + skb->mark = be32_to_cpu(tunnel->parms.i_key); + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + +static int vti_rcv(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); +} + +static int vti_rcv_cb(struct sk_buff *skb, int err) +{ + unsigned short family; + struct net_device *dev; + struct pcpu_sw_netstats *tstats; + struct xfrm_state *x; + struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; + + if (!tunnel) return 1; + + dev = tunnel->dev; + + if (err) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + + return 0; } - return -1; + x = xfrm_input_state(skb); + family = x->inner_mode->afinfo->family; + + if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + return -EPERM; + + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); + skb->dev = dev; + + tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + return 0; } -/* This function assumes it is being called from dev_queue_xmit() - * and that skb is filled properly by that function. - */ +static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src) +{ + xfrm_address_t *daddr = (xfrm_address_t *)&dst; + xfrm_address_t *saddr = (xfrm_address_t *)&src; -static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) + /* if there is no transform then this tunnel is not functional. + * Or if the xfrm is not mode tunnel. + */ + if (!x || x->props.mode != XFRM_MODE_TUNNEL || + x->props.family != AF_INET) + return false; + + if (!dst) + return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET); + + if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET)) + return false; + + return true; +} + +static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, + struct flowi *fl) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *tiph = &tunnel->parms.iph; - u8 tos; - struct rtable *rt; /* Route to the other host */ + struct ip_tunnel_parm *parms = &tunnel->parms; + struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ - struct iphdr *old_iph = ip_hdr(skb); - __be32 dst = tiph->daddr; - struct flowi4 fl4; int err; - if (skb->protocol != htons(ETH_P_IP)) - goto tx_error; - - tos = old_iph->tos; + if (!dst) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } - memset(&fl4, 0, sizeof(fl4)); - flowi4_init_output(&fl4, tunnel->parms.link, - be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), - RT_SCOPE_UNIVERSE, - IPPROTO_IPIP, 0, - dst, tiph->saddr, 0, 0); - rt = ip_route_output_key(dev_net(dev), &fl4); - if (IS_ERR(rt)) { + dst_hold(dst); + dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0); + if (IS_ERR(dst)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; } - /* if there is no transform then this tunnel is not functional. - * Or if the xfrm is not mode tunnel. - */ - if (!rt->dst.xfrm || - rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { + + if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { dev->stats.tx_carrier_errors++; - ip_rt_put(rt); + dst_release(dst); goto tx_error_icmp; } - tdev = rt->dst.dev; + + tdev = dst->dev; if (tdev == dev) { - ip_rt_put(rt); + dst_release(dst); dev->stats.collisions++; goto tx_error; } @@ -146,10 +188,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tunnel->err_count = 0; } - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - nf_reset(skb); + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); + skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; err = dst_output(skb); @@ -166,6 +206,95 @@ tx_error: return NETDEV_TX_OK; } +/* This function assumes it is being called from dev_queue_xmit() + * and that skb is filled properly by that function. + */ +static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + + skb->mark = be32_to_cpu(tunnel->parms.o_key); + + switch (skb->protocol) { + case htons(ETH_P_IP): + xfrm_decode_session(skb, &fl, AF_INET); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + break; + case htons(ETH_P_IPV6): + xfrm_decode_session(skb, &fl, AF_INET6); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + break; + default: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + return vti_xmit(skb, dev, &fl); +} + +static int vti4_err(struct sk_buff *skb, u32 info) +{ + __be32 spi; + struct xfrm_state *x; + struct ip_tunnel *tunnel; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah ; + struct ip_comp_hdr *ipch; + struct net *net = dev_net(skb->dev); + const struct iphdr *iph = (const struct iphdr *)skb->data; + int protocol = iph->protocol; + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); + if (!tunnel) + return -1; + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return 0; + case ICMP_REDIRECT: + break; + default: + return 0; + } + + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET); + if (!x) + return 0; + + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0); + else + ipv4_redirect(skb, net, 0, 0, protocol, 0); + xfrm_state_put(x); + + return 0; +} + static int vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -181,12 +310,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EINVAL; } + p.i_flags |= VTI_ISVTI; err = ip_tunnel_ioctl(dev, &p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p.i_flags |= GRE_KEY | VTI_ISVTI; + p.i_flags |= GRE_KEY; p.o_flags |= GRE_KEY; } @@ -224,7 +354,6 @@ static int vti_tunnel_init(struct net_device *dev) dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -241,9 +370,28 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; } -static struct xfrm_tunnel_notifier vti_handler __read_mostly = { +static struct xfrm4_protocol vti_esp4_protocol __read_mostly = { .handler = vti_rcv, - .priority = 1, + .input_handler = vti_input, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 100, +}; + +static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { + .handler = vti_rcv, + .input_handler = vti_input, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 100, +}; + +static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { + .handler = vti_rcv, + .input_handler = vti_input, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 100, }; static int __net_init vti_init_net(struct net *net) @@ -287,6 +435,8 @@ static void vti_netlink_parms(struct nlattr *data[], if (!data) return; + parms->i_flags = VTI_ISVTI; + if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); @@ -382,10 +532,31 @@ static int __init vti_init(void) err = register_pernet_device(&vti_net_ops); if (err < 0) return err; - err = xfrm4_mode_tunnel_input_register(&vti_handler); + err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP); + if (err < 0) { + unregister_pernet_device(&vti_net_ops); + pr_info("vti init: can't register tunnel\n"); + + return err; + } + + err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH); + if (err < 0) { + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti_net_ops); + pr_info("vti init: can't register tunnel\n"); + + return err; + } + + err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) { + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); pr_info("vti init: can't register tunnel\n"); + + return err; } err = rtnl_link_register(&vti_link_ops); @@ -395,7 +566,9 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_mode_tunnel_input_deregister(&vti_handler); + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); return err; } @@ -403,8 +576,13 @@ rtnl_link_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); - if (xfrm4_mode_tunnel_input_deregister(&vti_handler)) + if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP)) pr_info("vti close: can't deregister tunnel\n"); + if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH)) + pr_info("vti close: can't deregister tunnel\n"); + if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP)) + pr_info("vti close: can't deregister tunnel\n"); + unregister_pernet_device(&vti_net_ops); } |