diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 2 | ||||
-rw-r--r-- | net/ipv6/anycast.c | 6 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 10 | ||||
-rw-r--r-- | net/ipv6/fou6.c | 74 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 4 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 14 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 18 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 63 | ||||
-rw-r--r-- | net/ipv6/ip6_offload.c | 13 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 45 | ||||
-rw-r--r-- | net/ipv6/ip6_udp_tunnel.c | 16 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 2 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 2 | ||||
-rw-r--r-- | net/ipv6/raw.c | 5 | ||||
-rw-r--r-- | net/ipv6/route.c | 5 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 13 | ||||
-rw-r--r-- | net/ipv6/tunnel6.c | 12 | ||||
-rw-r--r-- | net/ipv6/udp.c | 241 | ||||
-rw-r--r-- | net/ipv6/udp_impl.h | 4 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 6 | ||||
-rw-r--r-- | net/ipv6/udplite.c | 5 | ||||
-rw-r--r-- | net/ipv6/xfrm6_protocol.c | 18 |
22 files changed, 436 insertions, 142 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 045597b9a7c0..521e471f1cf9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2820,7 +2820,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) dev = __dev_get_by_name(net, p.name); if (!dev) goto err_exit; - err = dev_open(dev); + err = dev_open(dev, NULL); } } #endif diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 94999058e110..cca3b3603c42 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -433,7 +433,6 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { - unsigned int hash = inet6_acaddr_hash(net, addr); struct net_device *nh_dev; struct ifacaddr6 *aca; bool found = false; @@ -441,7 +440,9 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); - else + else { + unsigned int hash = inet6_acaddr_hash(net, addr); + hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], aca_addr_lst) { nh_dev = fib6_info_nh_dev(aca->aca_rt); @@ -452,6 +453,7 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, break; } } + } rcu_read_unlock(); return found; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 1ede7a16a0be..bde08aa549f3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -772,6 +772,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, case IPV6_2292PKTINFO: { struct net_device *dev = NULL; + int src_idx; if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; @@ -779,12 +780,15 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, } src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); + src_idx = src_info->ipi6_ifindex; - if (src_info->ipi6_ifindex) { + if (src_idx) { if (fl6->flowi6_oif && - src_info->ipi6_ifindex != fl6->flowi6_oif) + src_idx != fl6->flowi6_oif && + (sk->sk_bound_dev_if != fl6->flowi6_oif || + !sk_dev_equal_l3scope(sk, src_idx))) return -EINVAL; - fl6->flowi6_oif = src_info->ipi6_ifindex; + fl6->flowi6_oif = src_idx; } addr_type = __ipv6_addr_type(&src_info->ipi6_addr); diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 6de3c04b0f30..bd675c61deb1 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -4,6 +4,7 @@ #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/udp.h> +#include <linux/icmpv6.h> #include <linux/types.h> #include <linux/kernel.h> #include <net/fou.h> @@ -69,14 +70,87 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return 0; } +static int gue6_err_proto_handler(int proto, struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info) +{ + const struct inet6_protocol *ipprot; + + ipprot = rcu_dereference(inet6_protos[proto]); + if (ipprot && ipprot->err_handler) { + if (!ipprot->err_handler(skb, opt, type, code, offset, info)) + return 0; + } + + return -ENOENT; +} + +static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + int transport_offset = skb_transport_offset(skb); + struct guehdr *guehdr; + size_t optlen; + int ret; + + if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; + + switch (guehdr->version) { + case 0: /* Full GUE header present */ + break; + case 1: { + /* Direct encasulation of IPv4 or IPv6 */ + skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); + + switch (((struct iphdr *)guehdr)->version) { + case 4: + ret = gue6_err_proto_handler(IPPROTO_IPIP, skb, opt, + type, code, offset, info); + goto out; + case 6: + ret = gue6_err_proto_handler(IPPROTO_IPV6, skb, opt, + type, code, offset, info); + goto out; + default: + ret = -EOPNOTSUPP; + goto out; + } + } + default: /* Undefined version */ + return -EOPNOTSUPP; + } + + if (guehdr->control) + return -ENOENT; + + optlen = guehdr->hlen << 2; + + if (validate_gue_flags(guehdr, optlen)) + return -EINVAL; + + skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); + ret = gue6_err_proto_handler(guehdr->proto_ctype, skb, + opt, type, code, offset, info); + +out: + skb_set_transport_header(skb, transport_offset); + return ret; +} + + static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { .encap_hlen = fou_encap_hlen, .build_header = fou6_build_header, + .err_handler = gue6_err, }; static const struct ip6_tnl_encap_ops gue_ip6tun_ops = { .encap_hlen = gue_encap_hlen, .build_header = gue6_build_header, + .err_handler = gue6_err, }; static int ip6_tnl_encap_add_fou_ops(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c9c53ade55c3..5d7aa2c2770c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -84,7 +84,7 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } -static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ @@ -100,6 +100,8 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) ping_err(skb, offset, ntohl(info)); + + return 0; } static int icmpv6_rcv(struct sk_buff *skb); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 3d7c7460a0c5..5eeeba7181a1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -99,6 +99,7 @@ static inline int compute_score(struct sock *sk, struct net *net, const int dif, const int sdif, bool exact_dif) { int score = -1; + bool dev_match; if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { @@ -109,15 +110,12 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (sk->sk_bound_dev_if || exact_dif) { - bool dev_match = (sk->sk_bound_dev_if == dif || - sk->sk_bound_dev_if == sdif); + dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, + dif, sdif); + if (!dev_match) + return -1; + score++; - if (!dev_match) - return -1; - if (sk->sk_bound_dev_if) - score++; - } if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 515adbdba1d2..81b69bcee714 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -423,7 +423,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) } -static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); @@ -433,13 +433,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6), offset) < 0) - return; + return -EINVAL; ipv6h = (const struct ipv6hdr *)skb->data; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, tpi.key, tpi.proto); if (!t) - return; + return -ENOENT; switch (type) { struct ipv6_tlv_tnl_enc_lim *tel; @@ -449,14 +449,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->parms.name); if (code != ICMPV6_PORT_UNREACH) break; - return; + return 0; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); break; } - return; + return 0; case ICMPV6_PARAMPROB: teli = 0; if (code == ICMPV6_HDR_FIELD) @@ -472,14 +472,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } - return; + return 0; case ICMPV6_PKT_TOOBIG: ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); - return; + return 0; case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); - return; + return 0; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) @@ -487,6 +487,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, else t->err_count = 1; t->err_time = jiffies; + + return 0; } static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c1d85830c906..c7ed2b6d5a1d 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -319,28 +319,26 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, /* * Deliver the packet to the host */ - - -static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final) { const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; - int nexthdr; bool raw; - bool have_final = false; /* * Parse extension headers */ - rcu_read_lock(); resubmit: idev = ip6_dst_idev(skb_dst(skb)); - if (!pskb_pull(skb, skb_transport_offset(skb))) - goto discard; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; + if (!have_final) { + if (!pskb_pull(skb, skb_transport_offset(skb))) + goto discard; + nexthdr = skb_network_header(skb)[nhoff]; + } resubmit_final: raw = raw6_local_deliver(skb, nexthdr); @@ -359,6 +357,8 @@ resubmit_final: } } else if (ipprot->flags & INET6_PROTO_FINAL) { const struct ipv6hdr *hdr; + int sdif = inet6_sdif(skb); + struct net_device *dev; /* Only do this once for first final protocol */ have_final = true; @@ -371,9 +371,19 @@ resubmit_final: skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); hdr = ipv6_hdr(skb); + + /* skb->dev passed may be master dev for vrfs. */ + if (sdif) { + dev = dev_get_by_index_rcu(net, sdif); + if (!dev) + goto discard; + } else { + dev = skb->dev; + } + if (ipv6_addr_is_multicast(&hdr->daddr) && - !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, - &hdr->saddr) && + !ipv6_chk_mcast_addr(dev, &hdr->daddr, + &hdr->saddr) && !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) goto discard; } @@ -411,13 +421,19 @@ resubmit_final: consume_skb(skb); } } - rcu_read_unlock(); - return 0; + return; discard: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - rcu_read_unlock(); kfree_skb(skb); +} + +static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + rcu_read_lock(); + ip6_protocol_deliver_rcu(net, skb, 0, false); + rcu_read_unlock(); + return 0; } @@ -432,15 +448,32 @@ EXPORT_SYMBOL_GPL(ip6_input); int ip6_mc_input(struct sk_buff *skb) { + int sdif = inet6_sdif(skb); const struct ipv6hdr *hdr; + struct net_device *dev; bool deliver; __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST, skb->len); + /* skb->dev passed may be master dev for vrfs. */ + if (sdif) { + rcu_read_lock(); + dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif); + if (!dev) { + rcu_read_unlock(); + kfree_skb(skb); + return -ENODEV; + } + } else { + dev = skb->dev; + } + hdr = ipv6_hdr(skb); - deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); + deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL); + if (sdif) + rcu_read_unlock(); #ifdef CONFIG_IPV6_MROUTE /* diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index c7e495f12011..70f525c33cb6 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -229,14 +229,21 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head, * XXX skbs on the gro_list have all been parsed and pulled * already so we don't need to compare nlen * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) - * memcmp() alone below is suffcient, right? + * memcmp() alone below is sufficient, right? */ if ((first_word & htonl(0xF00FFFFF)) || - memcmp(&iph->nexthdr, &iph2->nexthdr, - nlen - offsetof(struct ipv6hdr, nexthdr))) { + !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || + !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || + *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) { +not_same_flow: NAPI_GRO_CB(p)->same_flow = 0; continue; } + if (unlikely(nlen > sizeof(struct ipv6hdr))) { + if (memcmp(iph + 1, iph2 + 1, + nlen - sizeof(struct ipv6hdr))) + goto not_same_flow; + } /* flush if Traffic Class fields are different */ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fcd3c66ded16..9d55ee33b7f9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -378,6 +378,13 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); +#ifdef CONFIG_NET_SWITCHDEV + if (skb->offload_l3_fwd_mark) { + consume_skb(skb); + return 0; + } +#endif + return dst_output(net, sk, skb); } @@ -1245,6 +1252,7 @@ static int __ip6_append_data(struct sock *sk, { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; + struct ubuf_info *uarg = NULL; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; @@ -1257,7 +1265,7 @@ static int __ip6_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; - bool paged; + bool paged, extra_uref; skb = skb_peek_tail(queue); if (!skb) { @@ -1322,6 +1330,20 @@ emsgsize: rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; + if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { + uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); + if (!uarg) + return -ENOBUFS; + extra_uref = true; + if (rt->dst.dev->features & NETIF_F_SG && + csummode == CHECKSUM_PARTIAL) { + paged = true; + } else { + uarg->zerocopy = 0; + skb_zcopy_set(skb, uarg, &extra_uref); + } + } + /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1440,12 +1462,6 @@ alloc_new_skb: skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); - /* Only the initial fragment is time stamped */ - skb_shinfo(skb)->tx_flags = cork->tx_flags; - cork->tx_flags = 0; - skb_shinfo(skb)->tskey = tskey; - tskey = 0; - /* * Find where to start putting bytes */ @@ -1477,6 +1493,13 @@ alloc_new_skb: exthdrlen = 0; dst_exthdrlen = 0; + /* Only the initial fragment is time stamped */ + skb_shinfo(skb)->tx_flags = cork->tx_flags; + cork->tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; + skb_zcopy_set(skb, uarg, &extra_uref); + if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); @@ -1506,7 +1529,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - } else { + } else if (!uarg || !uarg->zerocopy) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; @@ -1536,6 +1559,10 @@ alloc_new_skb: skb->data_len += copy; skb->truesize += copy; wmem_alloc_delta += copy; + } else { + err = skb_zerocopy_iter_dgram(skb, from, copy); + if (err < 0) + goto error; } offset += copy; length -= copy; @@ -1548,6 +1575,8 @@ alloc_new_skb: error_efault: err = -EFAULT; error: + if (uarg) + sock_zerocopy_put_abort(uarg, extra_uref); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b283f293ee4a..3965d5396b0a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -31,6 +31,22 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, if (err < 0) goto error; } + if (cfg->bind_ifindex) { + struct net_device *dev; + + dev = dev_get_by_index(net, cfg->bind_ifindex); + if (!dev) { + err = -ENODEV; + goto error; + } + + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, + dev->name, strlen(dev->name) + 1); + dev_put(dev); + + if (err < 0) + goto error; + } udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e2ea691e42c6..8c63494400c4 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -655,7 +655,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - if (dev_open(dev)) + if (dev_open(dev, NULL)) goto failure; dev_hold(dev); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 381ce38940ae..973e215c3114 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -486,7 +486,7 @@ sticky_done: retv = -EFAULT; break; } - if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if) + if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex)) goto e_inval; np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5e0efd3954e9..aed7eb5c2123 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -86,9 +86,8 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != dif && - sk->sk_bound_dev_if != sdif) + if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, + dif, sdif)) continue; if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 059f0531f7c1..194bc162866d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2977,7 +2977,8 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!rt) goto out; - rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len); + rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len, + extack); if (IS_ERR(rt->fib6_metrics)) { err = PTR_ERR(rt->fib6_metrics); /* Do not leave garbage there. */ @@ -3710,7 +3711,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, if (!f6i) return ERR_PTR(-ENOMEM); - f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0); + f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL); f6i->dst_nocount = true; f6i->dst_host = true; f6i->fib6_protocol = RTPROT_KERNEL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 03e6b7a2bc53..a3f559162521 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -349,7 +349,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk) } } -static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; @@ -371,17 +371,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); - return; + return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); - return; + return 0; } seq = ntohl(th->seq); fatal = icmpv6_err_convert(type, code, &err); - if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq, fatal); + if (sk->sk_state == TCP_NEW_SYN_RECV) { + tcp_req_err(sk, seq, fatal); + return 0; + } bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -467,6 +469,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, out: bh_unlock_sock(sk); sock_put(sk); + return 0; } diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index dae25cad05cd..1991dede7367 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -134,24 +134,28 @@ drop: return 0; } -static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; for_each_tunnel_rcu(tunnel6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } -static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; for_each_tunnel_rcu(tunnel46_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static const struct inet6_protocol tunnel6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d2d97d07ef27..09cba4cfe31f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include <net/raw.h> #include <net/tcp_states.h> #include <net/ip6_checksum.h> +#include <net/ip6_tunnel.h> #include <net/xfrm.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> @@ -117,6 +118,7 @@ static int compute_score(struct sock *sk, struct net *net, { int score; struct inet_sock *inet; + bool dev_match; if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || @@ -144,15 +146,10 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_bound_dev_if || exact_dif) { - bool dev_match = (sk->sk_bound_dev_if == dif || - sk->sk_bound_dev_if == sdif); - - if (!dev_match) - return -1; - if (sk->sk_bound_dev_if) - score++; - } + dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); + if (!dev_match) + return -1; + score++; if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; @@ -329,6 +326,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; + struct udp_mib *mib; int is_udp4; if (flags & MSG_ERRQUEUE) @@ -352,6 +350,7 @@ try_again: msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); + mib = __UDPX_MIB(sk, is_udp4); /* * If checksum is needed at all, try to do it while copying the @@ -380,24 +379,13 @@ try_again: if (unlikely(err)) { if (!peeked) { atomic_inc(&sk->sk_drops); - if (is_udp4) - UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - is_udplite); - else - UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - is_udplite); + SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } - if (!peeked) { - if (is_udp4) - UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, - is_udplite); - else - UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, - is_udplite); - } + if (!peeked) + SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); sock_recv_ts_and_drops(msg, sk, skb); @@ -421,6 +409,9 @@ try_again: *addr_len = sizeof(*sin6); } + if (udp_sk(sk)->gro_enabled) + udp_cmsg_recv(msg, sk, skb); + if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); @@ -443,17 +434,8 @@ try_again: csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { - if (is_udp4) { - UDP_INC_STATS(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); - } else { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); - } + SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS); + SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); @@ -463,15 +445,106 @@ csum_copy_err: goto try_again; } -void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info, - struct udp_table *udptable) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +void udpv6_encap_enable(void) +{ + static_branch_inc(&udpv6_encap_needed_key); +} +EXPORT_SYMBOL(udpv6_encap_enable); + +/* Handler for tunnels with arbitrary destination ports: no socket lookup, go + * through error handlers in encapsulations looking for a match. + */ +static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info) +{ + int i; + + for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { + int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info); + + if (!ip6tun_encaps[i]) + continue; + handler = rcu_dereference(ip6tun_encaps[i]->err_handler); + if (handler && !handler(skb, opt, type, code, offset, info)) + return 0; + } + + return -ENOENT; +} + +/* Try to match ICMP errors to UDP tunnels by looking up a socket without + * reversing source and destination port: this will match tunnels that force the + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that + * lwtunnels might actually break this assumption by being configured with + * different destination ports on endpoints, in this case we won't be able to + * trace ICMP messages back to them. + * + * If this doesn't match any socket, probe tunnels with arbitrary destination + * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port + * we've sent packets to won't necessarily match the local destination port. + * + * Then ask the tunnel implementation to match the error against a valid + * association. + * + * Return an error if we can't find a match, the socket if we need further + * processing, zero otherwise. + */ +static struct sock *__udp6_lib_err_encap(struct net *net, + const struct ipv6hdr *hdr, int offset, + struct udphdr *uh, + struct udp_table *udptable, + struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, __be32 info) +{ + int network_offset, transport_offset; + struct sock *sk; + + network_offset = skb_network_offset(skb); + transport_offset = skb_transport_offset(skb); + + /* Network header needs to point to the outer IPv6 header inside ICMP */ + skb_reset_network_header(skb); + + /* Transport header needs to point to the UDP header */ + skb_set_transport_header(skb, offset); + + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, + &hdr->saddr, uh->dest, + inet6_iif(skb), 0, udptable, skb); + if (sk) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); + struct udp_sock *up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + } + + if (!sk) { + sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, + offset, info)); + } + + skb_set_transport_header(skb, transport_offset); + skb_set_network_header(skb, network_offset); + + return sk; +} + +int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info, + struct udp_table *udptable) { struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); + bool tunnel = false; struct sock *sk; int harderr; int err; @@ -480,9 +553,23 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; + /* No socket for error: try tunnels before discarding */ + sk = ERR_PTR(-ENOENT); + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + sk = __udp6_lib_err_encap(net, hdr, offset, uh, + udptable, skb, + opt, type, code, info); + if (!sk) + return 0; + } + + if (IS_ERR(sk)) { + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); + return PTR_ERR(sk); + } + + tunnel = true; } harderr = icmpv6_err_convert(type, code, &err); @@ -496,10 +583,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, harderr = 1; } if (type == NDISC_REDIRECT) { - ip6_sk_redirect(skb, sk); + if (tunnel) { + ip6_redirect(skb, sock_net(sk), inet6_iif(skb), + sk->sk_mark, sk->sk_uid); + } else { + ip6_sk_redirect(skb, sk); + } goto out; } + /* Tunnels don't have an application socket: don't pass errors back */ + if (tunnel) + goto out; + if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; @@ -510,7 +606,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk->sk_err = err; sk->sk_error_report(sk); out: - return; + return 0; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -541,21 +637,14 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info) +static __inline__ int udpv6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, u8 type, + u8 code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); + return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -void udpv6_encap_enable(void) -{ - static_branch_enable(&udpv6_encap_needed_key); -} -EXPORT_SYMBOL(udpv6_encap_enable); - -static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -638,10 +727,32 @@ drop: return -1; } +static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *next, *segs; + int ret; + + if (likely(!udp_unexpected_gso(sk, skb))) + return udpv6_queue_rcv_one_skb(sk, skb); + + __skb_push(skb, -skb_mac_offset(skb)); + segs = udp_rcv_segment(sk, skb, false); + for (skb = segs; skb; skb = next) { + next = skb->next; + __skb_pull(skb, skb_transport_offset(skb)); + + ret = udpv6_queue_rcv_one_skb(sk, skb); + if (ret > 0) + ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, + true); + } + return 0; +} + static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, - int dif, unsigned short hnum) + int dif, int sdif, unsigned short hnum) { struct inet_sock *inet = inet_sk(sk); @@ -653,7 +764,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) || + !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; @@ -687,6 +798,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); int dif = inet6_iif(skb); + int sdif = inet6_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; @@ -701,7 +813,8 @@ start_lookup: sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, - uh->source, saddr, dif, hnum)) + uh->source, saddr, dif, sdif, + hnum)) continue; /* If zero checksum and no_check is not on for * the socket then skip it. @@ -1458,11 +1571,15 @@ void udpv6_destroy_sock(struct sock *sk) udp_v6_flush_pending_frames(sk); release_sock(sk); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { - void (*encap_destroy)(struct sock *sk); - encap_destroy = READ_ONCE(up->encap_destroy); - if (encap_destroy) - encap_destroy(sk); + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + if (up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = READ_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + if (up->encap_enabled) + static_branch_dec(&udpv6_encap_needed_key); } inet6_destroy_sock(sk); diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 7903e21c178b..5730e6503cb4 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -9,8 +9,8 @@ #include <net/transp_v6.h> int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); -void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, - __be32, struct udp_table *); +int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, + __be32, struct udp_table *); int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 1b8e161ac527..828b2457f97b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5000ad6878e6..a125aebc29e5 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -20,11 +20,12 @@ static int udplitev6_rcv(struct sk_buff *skb) return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } -static void udplitev6_err(struct sk_buff *skb, +static int udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); + return __udp6_lib_err(skb, opt, type, code, offset, info, + &udplite_table); } static const struct inet6_protocol udplitev6_protocol = { diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index b2dc8ce49378..cc979b702c89 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -80,14 +80,16 @@ static int xfrm6_esp_rcv(struct sk_buff *skb) return 0; } -static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(esp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static int xfrm6_ah_rcv(struct sk_buff *skb) @@ -107,14 +109,16 @@ static int xfrm6_ah_rcv(struct sk_buff *skb) return 0; } -static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ah6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static int xfrm6_ipcomp_rcv(struct sk_buff *skb) @@ -134,14 +138,16 @@ static int xfrm6_ipcomp_rcv(struct sk_buff *skb) return 0; } -static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ipcomp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) - break; + return 0; + + return -ENOENT; } static const struct inet6_protocol esp6_protocol = { |