diff options
-rw-r--r-- | include/net/ipv6_stubs.h | 6 | ||||
-rw-r--r-- | include/net/xfrm.h | 31 | ||||
-rw-r--r-- | net/ipv4/Kconfig | 1 | ||||
-rw-r--r-- | net/ipv4/udp.c | 10 | ||||
-rw-r--r-- | net/ipv4/xfrm4_input.c | 5 | ||||
-rw-r--r-- | net/ipv4/xfrm4_output.c | 63 | ||||
-rw-r--r-- | net/ipv4/xfrm4_state.c | 24 | ||||
-rw-r--r-- | net/ipv6/Kconfig | 12 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 6 | ||||
-rw-r--r-- | net/ipv6/ah6.c | 1 | ||||
-rw-r--r-- | net/ipv6/esp6.c | 414 | ||||
-rw-r--r-- | net/ipv6/esp6_offload.c | 7 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 18 | ||||
-rw-r--r-- | net/ipv6/ipcomp6.c | 1 | ||||
-rw-r--r-- | net/ipv6/xfrm6_input.c | 111 | ||||
-rw-r--r-- | net/ipv6/xfrm6_output.c | 96 | ||||
-rw-r--r-- | net/ipv6/xfrm6_protocol.c | 48 | ||||
-rw-r--r-- | net/ipv6/xfrm6_state.c | 26 | ||||
-rw-r--r-- | net/xfrm/Kconfig | 3 | ||||
-rw-r--r-- | net/xfrm/Makefile | 2 | ||||
-rw-r--r-- | net/xfrm/espintcp.c | 56 | ||||
-rw-r--r-- | net/xfrm/xfrm_inout.h | 32 | ||||
-rw-r--r-- | net/xfrm/xfrm_input.c | 21 | ||||
-rw-r--r-- | net/xfrm/xfrm_interface.c | 5 | ||||
-rw-r--r-- | net/xfrm/xfrm_output.c | 129 |
25 files changed, 836 insertions, 292 deletions
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 6e622dd3122e..d7a7f7c81e7b 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -56,6 +56,12 @@ struct ipv6_stub { void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); +#if IS_ENABLED(CONFIG_XFRM) + void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); + int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); + int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); +#endif struct neigh_table *nd_tbl; }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8f71c111e65a..094fe682f5d7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -361,11 +361,6 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_dstopts; int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); - int (*output_finish)(struct sock *sk, struct sk_buff *skb); - int (*extract_input)(struct xfrm_state *x, - struct sk_buff *skb); - int (*extract_output)(struct xfrm_state *x, - struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); void (*local_error)(struct sk_buff *skb, u32 mtu); @@ -1406,6 +1401,8 @@ struct xfrm4_protocol { struct xfrm6_protocol { int (*handler)(struct sk_buff *skb); + int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); @@ -1562,7 +1559,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); @@ -1578,7 +1574,6 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) return xfrm_input(skb, nexthdr, spi, 0); } -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); @@ -1586,10 +1581,11 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int xfrm6_transport_finish(struct sk_buff *skb, int async); int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); @@ -1602,14 +1598,15 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen); #else @@ -1992,4 +1989,20 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, return 0; } + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool xfrm6_local_dontfrag(const struct sock *sk) +{ + int proto; + + if (!sk || sk->sk_family != AF_INET6) + return false; + + proto = sk->sk_protocol; + if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) + return inet6_sk(sk)->dontfrag; + + return false; +} +#endif #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5da4733067fb..23ba5045e3d3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -384,6 +384,7 @@ config INET_ESPINTCP depends on XFRM && INET_ESP select STREAM_PARSER select NET_SOCK_MSG + select XFRM_ESPINTCP help Support for RFC 8229 encapsulation of ESP and IKE over TCP/IPv4 sockets. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32564b350823..1b7ebbcae497 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -112,6 +112,9 @@ #include <net/sock_reuseport.h> #include <net/addrconf.h> #include <net/udp_tunnel.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6_stubs.h> +#endif struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -2563,7 +2566,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: - up->encap_rcv = xfrm4_udp_encap_rcv; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + else +#endif + up->encap_rcv = xfrm4_udp_encap_rcv; #endif fallthrough; case UDP_ENCAP_L2TPINUDP: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f8de2482a529..ad2afeef4f10 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -18,11 +18,6 @@ #include <net/ip.h> #include <net/xfrm.h> -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm4_extract_header(skb); -} - static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 30ddb9dc9398..3cff51ba72bb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -14,77 +14,18 @@ #include <net/xfrm.h> #include <net/icmp.h> -static int xfrm4_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - - if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) - goto out; - - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) - goto out; - - mtu = dst_mtu(skb_dst(skb)); - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { - skb->protocol = htons(ETH_P_IP); - - if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm4_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; - - return xfrm4_extract_header(skb); -} - -int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - - IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; - - return xfrm_output(sk, skb); -} - static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { +#ifdef CONFIG_NETFILTER struct xfrm_state *x = skb_dst(skb)->xfrm; - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; -#ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(net, sk, skb); } #endif - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; + return xfrm_output(sk, skb); } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index f8ed3c3bb928..87d4db591488 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -8,36 +8,12 @@ * */ -#include <net/ip.h> #include <net/xfrm.h> -#include <linux/pfkeyv2.h> -#include <linux/ipsec.h> -#include <linux/netfilter_ipv4.h> -#include <linux/export.h> - -int xfrm4_extract_header(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = iph->id; - XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; - XFRM_MODE_SKB_CB(skb)->tos = iph->tos; - XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; - XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); - memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .proto = IPPROTO_IPIP, .output = xfrm4_output, - .output_finish = xfrm4_output_finish, - .extract_input = xfrm4_extract_input, - .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, .local_error = xfrm4_local_error, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5a6111da26c4..4f03aece2980 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD If unsure, say N. +config INET6_ESPINTCP + bool "IPv6: ESP in TCP encapsulation (RFC 8229)" + depends on XFRM && INET6_ESP + select STREAM_PARSER + select NET_SOCK_MSG + select XFRM_ESPINTCP + help + Support for RFC 8229 encapsulation of ESP and IKE over + TCP/IPv6 sockets. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0625a97a8894..b304b882e031 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include <net/seg6.h> #include <net/rpl.h> #include <net/compat.h> +#include <net/xfrm.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -1023,6 +1024,11 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, +#if IS_ENABLED(CONFIG_XFRM) + .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, + .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_rcv_encap = xfrm6_rcv_encap, +#endif .nd_tbl = &nd_tbl, }; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 45e2adc56610..d88d97617f7e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -767,6 +767,7 @@ static const struct xfrm_type ah6_type = { static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, .priority = 0, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 11143d039f16..c43592771126 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -26,11 +26,16 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> +#include <net/udp.h> #include <linux/icmpv6.h> +#include <net/tcp.h> +#include <net/espintcp.h> +#include <net/inet6_hashtables.h> #include <linux/highmem.h> @@ -39,6 +44,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) /* @@ -72,9 +82,9 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) @@ -104,16 +114,17 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { + struct esp_output_extra *extra = esp_tmp_extra(tmp); struct crypto_aead *aead = x->data; - int seqhilen = 0; + int extralen = 0; u8 *iv; struct aead_request *req; struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); - iv = esp_tmp_iv(aead, tmp, seqhilen); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); /* Unref skb_frag_pages in the src scatterlist if necessary. @@ -124,6 +135,149 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +#ifdef CONFIG_INET6_ESPINTCP +struct esp_tcp_sk { + struct sock *sk; + struct rcu_head rcu; +}; + +static void esp_free_tcp_sk(struct rcu_head *head) +{ + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); + + sock_put(esk->sk); + kfree(esk); +} + +static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct esp_tcp_sk *esk; + __be16 sport, dport; + struct sock *nsk; + struct sock *sk; + + sk = rcu_dereference(x->encap_sk); + if (sk && sk->sk_state == TCP_ESTABLISHED) + return sk; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (sk && sk == nsk) { + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); + if (!esk) { + spin_unlock_bh(&x->lock); + return ERR_PTR(-ENOMEM); + } + RCU_INIT_POINTER(x->encap_sk, NULL); + esk->sk = sk; + call_rcu(&esk->rcu, esp_free_tcp_sk); + } + spin_unlock_bh(&x->lock); + + sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6, + dport, &x->props.saddr.in6, ntohs(sport), 0, 0); + if (!sk) + return ERR_PTR(-ENOENT); + + if (!tcp_is_ulp_esp(sk)) { + sock_put(sk); + return ERR_PTR(-EINVAL); + } + + spin_lock_bh(&x->lock); + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (encap->encap_sport != sport || + encap->encap_dport != dport) { + sock_put(sk); + sk = nsk ?: ERR_PTR(-EREMCHG); + } else if (sk == nsk) { + sock_put(sk); + } else { + rcu_assign_pointer(x->encap_sk, sk); + } + spin_unlock_bh(&x->lock); + + return sk; +} + +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) +{ + struct sock *sk; + int err; + + rcu_read_lock(); + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); + if (err) + goto out; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + err = espintcp_queue_out(sk, skb); + else + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + +out: + rcu_read_unlock(); + return err; +} + +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + return esp_output_tcp_finish(x, skb); +} + +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + local_bh_disable(); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + local_bh_enable(); + + /* EINPROGRESS just happens to do the right thing. It + * actually means that the skb has been consumed and + * isn't coming back. + */ + return err ?: -EINPROGRESS; +} +#else +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + kfree_skb(skb); + + return -EOPNOTSUPP; +} +#endif + +static void esp_output_encap_csum(struct sk_buff *skb) +{ + /* UDP encap with IPv6 requires a valid checksum */ + if (*skb_mac_header(skb) == IPPROTO_UDP) { + struct udphdr *uh = udp_hdr(skb); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int len = ntohs(uh->len); + unsigned int offset = skb_transport_offset(skb); + __wsum csum = skb_checksum(skb, offset, skb->len - offset, 0); + + uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -143,6 +297,8 @@ static void esp_output_done(struct crypto_async_request *base, int err) esp_ssg_unref(x, tmp); kfree(tmp); + esp_output_encap_csum(skb); + if (xo && (xo->flags & XFRM_DEV_RESUME)) { if (err) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); @@ -154,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err) secpath_reset(skb); xfrm_dev_resume(skb); } else { - xfrm_output_resume(skb, err); + if (!err && + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + esp_output_tail_tcp(x, skb); + else + xfrm_output_resume(skb, err); } } @@ -163,7 +323,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -171,27 +331,36 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, struct xfrm_state *x, struct ip_esp_hdr *esph, - __be32 *seqhi) + struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + __u32 seqhi; struct xfrm_offload *xo = xfrm_offload(skb); - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; if (xo) - esph->seq_no = htonl(xo->seq.hi); + seqhi = xo->seq.hi; else - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + seqhi = XFRM_SKB_CB(skb)->seq.output.hi; + + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; + esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; @@ -207,15 +376,122 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, + int encap_type, + struct esp_info *esp, + __be16 sport, + __be16 dport) +{ + struct udphdr *uh; + __be32 *udpdata32; + unsigned int len; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > U16_MAX) + return ERR_PTR(-EMSGSIZE); + + uh = (struct udphdr *)esp->esph; + uh->source = sport; + uh->dest = dport; + uh->len = htons(len); + uh->check = 0; + + *skb_mac_header(skb) = IPPROTO_UDP; + + if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { + udpdata32 = (__be32 *)(uh + 1); + udpdata32[0] = udpdata32[1] = 0; + return (struct ip_esp_hdr *)(udpdata32 + 2); + } + + return (struct ip_esp_hdr *)(uh + 1); +} + +#ifdef CONFIG_INET6_ESPINTCP +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + __be16 *lenp = (void *)esp->esph; + struct ip_esp_hdr *esph; + unsigned int len; + struct sock *sk; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); + + rcu_read_lock(); + sk = esp6_find_tcp_sk(x); + rcu_read_unlock(); + + if (IS_ERR(sk)) + return ERR_CAST(sk); + + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + + return esph; +} +#else +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + +static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, + struct esp_info *esp) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct ip_esp_hdr *esph; + __be16 sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); + break; + case TCP_ENCAP_ESPINTCP: + esph = esp6_output_tcp_encap(x, skb, esp); + break; + } + + if (IS_ERR(esph)) + return PTR_ERR(esph); + + esp->esph = esph; + + return 0; +} + int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; u8 *vaddr; int nfrags; + int esph_offset; struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + if (x->encap) { + int err = esp6_output_encap(x, skb, esp); + + if (err < 0) + return err; + } + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -274,10 +550,13 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } cow: + esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); + nfrags = skb_cow_data(skb, tailen, &trailer); if (nfrags < 0) goto out; tail = skb_tail_pointer(trailer); + esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); skip_cow: esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); @@ -295,20 +574,20 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info void *tmp; int ivlen; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; struct page *page; struct ip_esp_hdr *esph; struct aead_request *req; struct crypto_aead *aead; struct scatterlist *sg, *dsg; + struct esp_output_extra *extra; int err = -ENOMEM; assoclen = sizeof(struct ip_esp_hdr); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); assoclen += sizeof(__be32); } @@ -316,12 +595,12 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info alen = crypto_aead_authsize(aead); ivlen = crypto_aead_ivsize(aead); - tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen); + tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen); if (!tmp) goto error; - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -330,7 +609,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info else dsg = &sg[esp->nfrags]; - esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); + esph = esp_output_set_esn(skb, x, esp->esph, extra); + esp->esph = esph; sg_init_table(sg, esp->nfrags); err = skb_to_sgvec(skb, sg, @@ -394,11 +674,15 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info case 0: if ((x->props.flags & XFRM_STATE_ESN)) esp_output_restore_header(skb); + esp_output_encap_csum(skb); } if (sg != dsg) esp_ssg_unref(x, tmp); + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + err = esp_output_tail_tcp(x, skb); + error_free: kfree(tmp); error: @@ -438,11 +722,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; + esp.esph = ip_esp_hdr(skb); + esp.nfrags = esp6_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; - esph = ip_esp_hdr(skb); + esph = esp.esph; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); @@ -517,6 +803,60 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (unlikely(err < 0)) goto out; + if (x->encap) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct xfrm_encap_tmpl *encap = x->encap; + struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); + struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); + __be16 source; + + switch (x->encap->encap_type) { + case TCP_ENCAP_ESPINTCP: + source = th->source; + break; + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + source = uh->source; + break; + default: + WARN_ON_ONCE(1); + err = -EINVAL; + goto out; + } + + /* + * 1) if the NAT-T peer's IP or port changed then + * advertize the change to the keying daemon. + * This is an inbound SA, so just compare + * SRC ports. + */ + if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) || + source != encap->encap_sport) { + xfrm_address_t ipaddr; + + memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6)); + km_new_mapping(x, &ipaddr, source); + + /* XXX: perhaps add an extra + * policy check here, to see + * if we should allow or + * reject a packet from a + * different source + * address/port. + */ + } + + /* + * 2) ignore UDP/TCP checksums in case + * of NAT-T in Transport Mode, or + * perform other post-processing fixes + * as per draft-ietf-ipsec-udp-encaps-06, + * section 3.1.2 + */ + if (x->props.mode == XFRM_MODE_TRANSPORT) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); @@ -632,7 +972,7 @@ skip_cow: goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -836,9 +1176,6 @@ static int esp6_init_state(struct xfrm_state *x) u32 align; int err; - if (x->encap) - return -EINVAL; - x->data = NULL; if (x->aead) @@ -867,6 +1204,30 @@ static int esp6_init_state(struct xfrm_state *x) break; } + if (x->encap) { + struct xfrm_encap_tmpl *encap = x->encap; + + switch (encap->encap_type) { + default: + err = -EINVAL; + goto error; + case UDP_ENCAP_ESPINUDP: + x->props.header_len += sizeof(struct udphdr); + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); + break; +#ifdef CONFIG_INET6_ESPINTCP + case TCP_ENCAP_ESPINTCP: + /* only the length field, TCP encap is done by + * the socket + */ + x->props.header_len += 2; + break; +#endif + } + } + align = ALIGN(crypto_aead_blocksize(aead), 4); x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); @@ -893,6 +1254,7 @@ static const struct xfrm_type esp6_type = { static struct xfrm6_protocol esp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, .priority = 0, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 8eab2c869d61..06163cc15844 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -271,7 +271,6 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features int alen; int blksize; struct xfrm_offload *xo; - struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; @@ -312,13 +311,13 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features seq = xo->seq.low; - esph = ip_esp_hdr(skb); - esph->spi = x->id.spi; + esp.esph = ip_esp_hdr(skb); + esp.esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(seq); + esp.esph->seq_no = htonl(seq); if (!skb_is_gso(skb)) xo->seq.low++; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index cc6180e08a4f..1147f647b9a0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -296,7 +296,8 @@ static void vti6_dev_uninit(struct net_device *dev) dev_put(dev); } -static int vti6_rcv(struct sk_buff *skb) +static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -323,7 +324,10 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_unlock(); - return xfrm6_rcv_tnl(skb, t); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + return xfrm_input(skb, nexthdr, spi, encap_type); } rcu_read_unlock(); return -EINVAL; @@ -332,6 +336,13 @@ discard: return 0; } +static int vti6_rcv(struct sk_buff *skb) +{ + int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff]; + + return vti6_input_proto(skb, nexthdr, 0, 0); +} + static int vti6_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -1185,6 +1196,7 @@ static struct pernet_operations vti6_net_ops = { static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1192,6 +1204,7 @@ static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1199,6 +1212,7 @@ static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3752bd3e92ce..99668bfebd85 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -183,6 +183,7 @@ static const struct xfrm_type ipcomp6_type = { static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a52cb3fc6df5..04cbeefd8982 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -17,11 +17,6 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm6_extract_header(skb); -} - int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { @@ -35,9 +30,12 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); static int xfrm6_transport_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (xfrm_trans_queue(skb, ip6_rcv_finish)) - __kfree_skb(skb); - return -1; + if (xfrm_trans_queue(skb, ip6_rcv_finish)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return 0; } int xfrm6_transport_finish(struct sk_buff *skb, int async) @@ -60,13 +58,106 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); skb_reset_transport_header(skb); - return -1; + return 0; } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm6_transport_finish2); - return -1; + return 0; +} + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct ipv6hdr *ip6h; + int len; + int ip6hlen = sizeof(struct ipv6hdr); + + __u8 *udpdata; + __be32 *udpdata32; + __u16 encap_type = up->encap_type; + + /* if this is not encapsulated socket, then just return now */ + if (!encap_type) + return 1; + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + len = skb->len - sizeof(struct udphdr); + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = udp_hdr(skb); + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { + /* ESP Packet without Non-ESP header */ + len = sizeof(struct udphdr); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + } + + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + if (skb_unclone(skb, GFP_ATOMIC)) + goto drop; + + /* Now we can update and verify the packet length... */ + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); + if (skb->len < ip6hlen + len) { + /* packet is too small!?! */ + goto drop; + } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + __skb_pull(skb, len); + skb_reset_transport_header(skb); + + /* process ESP */ + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); + +drop: + kfree_skb(skb); + return 0; } int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index e34167f790e6..8b84d534b19d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -23,24 +23,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, } EXPORT_SYMBOL(xfrm6_find_1stfragopt); -static int xfrm6_local_dontfrag(struct sk_buff *skb) -{ - int proto; - struct sock *sk = skb->sk; - - if (sk) { - if (sk->sk_family != AF_INET6) - return 0; - - proto = sk->sk_protocol; - if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; - } - - return 0; -} - -static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; @@ -64,80 +47,9 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu) ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } -static int xfrm6_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - struct dst_entry *dst = skb_dst(skb); - - if (skb->ignore_df) - goto out; - - mtu = dst_mtu(dst); - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - - if (xfrm6_local_dontfrag(skb)) - xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm6_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; - - return xfrm6_extract_header(skb); -} - -int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; - - return xfrm_output(sk, skb); -} - -static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk, - struct sk_buff *skb) -{ - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; -} - static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct xfrm_state *x = skb_dst(skb)->xfrm; - - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -164,7 +76,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb)) { + if (toobig && xfrm6_local_dontfrag(skb->sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; @@ -179,7 +91,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) __xfrm6_output_finish); skip_frag: - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 34cb65c7d5a7..ea2f805d3b01 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -14,6 +14,7 @@ #include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> +#include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/xfrm.h> @@ -58,6 +59,53 @@ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + int ret; + struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr); + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + + if (!head) + goto out; + + if (!skb_dst(skb)) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, + skb, flags); + if (dst->error) + goto drop; + skb_dst_set(skb, dst); + } + + for_each_protocol_rcu(*head, handler) + if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) + return ret; + +out: + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_encap); + static int xfrm6_esp_rcv(struct sk_buff *skb) { int ret; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 78daadecbdef..6610b2198fa9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -13,37 +13,11 @@ */ #include <net/xfrm.h> -#include <linux/pfkeyv2.h> -#include <linux/ipsec.h> -#include <linux/netfilter_ipv6.h> -#include <linux/export.h> -#include <net/dsfield.h> -#include <net/ipv6.h> -#include <net/addrconf.h> - -int xfrm6_extract_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = 0; - XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); - XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); - XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->optlen = 0; - memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .output = xfrm6_output, - .output_finish = xfrm6_output_finish, - .extract_input = xfrm6_extract_input, - .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, .local_error = xfrm6_local_error, }; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 6921a18201a0..b7fd9c838416 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -99,4 +99,7 @@ config NET_KEY_MIGRATE If unsure, say N. +config XFRM_ESPINTCP + bool + endif # INET diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 212a4fcb4a88..2d4bb4b9f75e 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o -obj-$(CONFIG_INET_ESPINTCP) += espintcp.o +obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 037ea156d2f9..2132a3b6df0f 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -6,6 +6,9 @@ #include <net/espintcp.h> #include <linux/skmsg.h> #include <net/inet_common.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6_stubs.h> +#endif static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, struct sock *sk) @@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); local_bh_disable(); - xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + else +#endif + xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); local_bh_enable(); rcu_read_unlock(); } @@ -347,6 +355,9 @@ unlock: static struct proto espintcp_prot __ro_after_init; static struct proto_ops espintcp_ops __ro_after_init; +static struct proto espintcp6_prot; +static struct proto_ops espintcp6_ops; +static DEFINE_MUTEX(tcpv6_prot_mutex); static void espintcp_data_ready(struct sock *sk) { @@ -384,10 +395,14 @@ static void espintcp_destruct(struct sock *sk) bool tcp_is_ulp_esp(struct sock *sk) { - return sk->sk_prot == &espintcp_prot; + return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot; } EXPORT_SYMBOL_GPL(tcp_is_ulp_esp); +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops); static int espintcp_init_sk(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -415,8 +430,19 @@ static int espintcp_init_sk(struct sock *sk) strp_check_rcv(&ctx->strp); skb_queue_head_init(&ctx->ike_queue); skb_queue_head_init(&ctx->out_queue); - sk->sk_prot = &espintcp_prot; - sk->sk_socket->ops = &espintcp_ops; + + if (sk->sk_family == AF_INET) { + sk->sk_prot = &espintcp_prot; + sk->sk_socket->ops = &espintcp_ops; + } else { + mutex_lock(&tcpv6_prot_mutex); + if (!espintcp6_prot.recvmsg) + build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops); + mutex_unlock(&tcpv6_prot_mutex); + + sk->sk_prot = &espintcp6_prot; + sk->sk_socket->ops = &espintcp6_ops; + } ctx->saved_data_ready = sk->sk_data_ready; ctx->saved_write_space = sk->sk_write_space; sk->sk_data_ready = espintcp_data_ready; @@ -489,6 +515,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock, return mask; } +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops) +{ + memcpy(espintcp_prot, orig_prot, sizeof(struct proto)); + memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops)); + espintcp_prot->sendmsg = espintcp_sendmsg; + espintcp_prot->recvmsg = espintcp_recvmsg; + espintcp_prot->close = espintcp_close; + espintcp_prot->release_cb = espintcp_release; + espintcp_ops->poll = espintcp_poll; +} + static struct tcp_ulp_ops espintcp_ulp __read_mostly = { .name = "espintcp", .owner = THIS_MODULE, @@ -497,13 +537,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = { void __init espintcp_init(void) { - memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot)); - memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops)); - espintcp_prot.sendmsg = espintcp_sendmsg; - espintcp_prot.recvmsg = espintcp_recvmsg; - espintcp_prot.close = espintcp_close; - espintcp_prot.release_cb = espintcp_release; - espintcp_ops.poll = espintcp_poll; + build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops); tcp_register_ulp(&espintcp_ulp); } diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h index c7b0318938e2..efc5e6b2e87b 100644 --- a/net/xfrm/xfrm_inout.h +++ b/net/xfrm/xfrm_inout.h @@ -6,6 +6,38 @@ #ifndef XFRM_INOUT_H #define XFRM_INOUT_H 1 +static inline void xfrm4_extract_header(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = iph->id; + XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; + XFRM_MODE_SKB_CB(skb)->tos = iph->tos; + XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); + memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +} + +static inline void xfrm6_extract_header(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +#else + WARN_ON_ONCE(1); +#endif +} + static inline void xfrm6_beet_make_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index aa35f23c4912..6db266a0cb2d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { const struct xfrm_mode *inner_mode = &x->inner_mode; - const struct xfrm_state_afinfo *afinfo; - int err = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - err = afinfo->extract_input(x, skb); - rcu_read_unlock(); - if (err) - return err; + switch (x->outer_mode.family) { + case AF_INET: + xfrm4_extract_header(skb); + break; + case AF_INET6: + xfrm6_extract_header(skb); + break; + default: + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; + } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 3361e3ac5714..02f8f46d0cc5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - dev_hold(dev); xfrmi_link(xfrmn, xi); return 0; @@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); xfrmi_unlink(xfrmn, xi); - dev_put(dev); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) @@ -757,6 +755,7 @@ static struct pernet_operations xfrmi_net_ops = { static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -764,6 +763,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -771,6 +771,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2fd3d990d992..9c43b8dd80fb 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,9 +13,15 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/icmp.h> #include <net/inet_ecn.h> #include <net/xfrm.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_route.h> +#include <net/ipv6_stubs.h> +#endif + #include "xfrm_inout.h" static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -565,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + switch (x->outer_mode.family) { + case AF_INET: + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); +#ifdef CONFIG_NETFILTER + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; +#endif + break; + case AF_INET6: + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif + break; + } + secpath_reset(skb); if (xfrm_dev_offload_ok(skb, x)) { @@ -609,11 +631,101 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); +static int xfrm4_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + + if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) + goto out; + + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) + goto out; + + mtu = dst_mtu(skb_dst(skb)); + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { + skb->protocol = htons(ETH_P_IP); + + if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + ret = -EMSGSIZE; + } +out: + return ret; +} + +static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm4_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; + + xfrm4_extract_header(skb); + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + struct dst_entry *dst = skb_dst(skb); + + if (skb->ignore_df) + goto out; + + mtu = dst_mtu(dst); + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + if (xfrm6_local_dontfrag(skb->sk)) + ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ret = -EMSGSIZE; + } +out: + return ret; +} +#endif + +static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm6_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; + + xfrm6_extract_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; - int err = -EAFNOSUPPORT; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, @@ -624,13 +736,14 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (inner_mode == NULL) return -EAFNOSUPPORT; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - err = afinfo->extract_output(x, skb); - rcu_read_unlock(); + switch (inner_mode->family) { + case AF_INET: + return xfrm4_extract_output(x, skb); + case AF_INET6: + return xfrm6_extract_output(x, skb); + } - return err; + return -EAFNOSUPPORT; } void xfrm_local_error(struct sk_buff *skb, int mtu) |