diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 8 | ||||
-rw-r--r-- | net/ipv6/ah6.c | 2 | ||||
-rw-r--r-- | net/ipv6/esp6.c | 4 | ||||
-rw-r--r-- | net/ipv6/esp6_offload.c | 1 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 31 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 2 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 21 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 9 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 40 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 5 | ||||
-rw-r--r-- | net/ipv6/ipcomp6.c | 2 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 25 | ||||
-rw-r--r-- | net/ipv6/mip6.c | 99 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_reject_ipv6.c | 2 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 28 | ||||
-rw-r--r-- | net/ipv6/raw.c | 2 | ||||
-rw-r--r-- | net/ipv6/route.c | 131 | ||||
-rw-r--r-- | net/ipv6/seg6_local.c | 94 | ||||
-rw-r--r-- | net/ipv6/sit.c | 6 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 31 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 20 | ||||
-rw-r--r-- | net/ipv6/udp.c | 2 | ||||
-rw-r--r-- | net/ipv6/xfrm6_output.c | 7 | ||||
-rw-r--r-- | net/ipv6/xfrm6_tunnel.c | 1 |
25 files changed, 343 insertions, 232 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 701eb82acd1c..3bf685fe64b9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6903,10 +6903,10 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { - .procname = "addr_gen_mode", - .data = &ipv6_devconf.addr_gen_mode, - .maxlen = sizeof(int), - .mode = 0644, + .procname = "addr_gen_mode", + .data = &ipv6_devconf.addr_gen_mode, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = addrconf_sysctl_addr_gen_mode, }, { diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 20d492da725a..828e62514260 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -755,7 +755,6 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type ah6_type = { - .description = "AH6", .owner = THIS_MODULE, .proto = IPPROTO_AH, .flags = XFRM_TYPE_REPLAY_PROT, @@ -763,7 +762,6 @@ static const struct xfrm_type ah6_type = { .destructor = ah6_destroy, .input = ah6_input, .output = ah6_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct xfrm6_protocol ah6_protocol = { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 393ae2b78e7d..ed2f061b8768 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -708,7 +708,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } @@ -1243,7 +1243,6 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type esp6_type = { - .description = "ESP6", .owner = THIS_MODULE, .proto = IPPROTO_ESP, .flags = XFRM_TYPE_REPLAY_PROT, @@ -1251,7 +1250,6 @@ static const struct xfrm_type esp6_type = { .destructor = esp6_destroy, .input = esp6_input, .output = esp6_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct xfrm6_protocol esp6_protocol = { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 40ed4fcf1cf4..a349d4798077 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -377,7 +377,6 @@ static const struct net_offload esp6_offload = { }; static const struct xfrm_type_offload esp6_type_offload = { - .description = "ESP6 OFFLOAD", .owner = THIS_MODULE, .proto = IPPROTO_ESP, .input_tail = esp6_input_tail, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 56e479d158b7..26882e165c9e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -135,18 +135,23 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, len -= 2; while (len > 0) { - int optlen = nh[off + 1] + 2; - int i; + int optlen, i; - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; + if (nh[off] == IPV6_TLV_PAD1) { padlen++; if (padlen > 7) goto bad; - break; + off++; + len--; + continue; + } + if (len < 2) + goto bad; + optlen = nh[off + 1] + 2; + if (optlen > len) + goto bad; - case IPV6_TLV_PADN: + if (nh[off] == IPV6_TLV_PADN) { /* RFC 2460 states that the purpose of PadN is * to align the containing header to multiples * of 8. 7 is therefore the highest valid value. @@ -163,12 +168,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, if (nh[off + i] != 0) goto bad; } - break; - - default: /* Other TLV code so scan list */ - if (optlen > len) - goto bad; - + } else { tlv_count++; if (tlv_count > max_count) goto bad; @@ -188,7 +188,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, return false; padlen = 0; - break; } off += optlen; len -= optlen; @@ -306,7 +305,7 @@ fail_and_free: #endif if (ip6_parse_tlv(tlvprocdestopt_lst, skb, - init_net.ipv6.sysctl.max_dst_opts_cnt)) { + net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -1037,7 +1036,7 @@ fail_and_free: opt->flags |= IP6SKB_HOPBYHOP; if (ip6_parse_tlv(tlvprochopopt_lst, skb, - init_net.ipv6.sysctl.max_hbh_opts_cnt)) { + net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8f9a83314de7..40f3e4f9f33a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -467,7 +467,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { static int __net_init fib6_rules_net_init(struct net *net) { struct fib_rules_ops *ops; - int err = -ENOMEM; + int err; ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e8398ffb5e35..a7c31ab67c5d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -725,6 +725,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); bool acast; + u8 type; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && net->ipv6.sysctl.icmpv6_echo_ignore_multicast) @@ -740,8 +741,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) saddr = NULL; + if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) + type = ICMPV6_EXT_ECHO_REPLY; + else + type = ICMPV6_ECHO_REPLY; + memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); - tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; + tmp_hdr.icmp6_type = type; memset(&fl6, 0, sizeof(fl6)); if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) @@ -752,7 +758,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (saddr) fl6.saddr = *saddr; fl6.flowi6_oif = icmp6_iif(skb); - fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.fl6_icmp_type = type; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); @@ -783,13 +789,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.skb = skb; msg.offset = 0; - msg.type = ICMPV6_ECHO_REPLY; + msg.type = type; ipcm6_init_sk(&ipc6, np); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; + if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) + if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) + goto out_dst_release; + if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, @@ -911,6 +921,11 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) icmpv6_echo_reply(skb); break; + case ICMPV6_EXT_ECHO_REQUEST: + if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && + net->ipv4.sysctl_icmp_echo_enable_probe) + icmpv6_echo_reply(skb); + break; case ICMPV6_ECHO_REPLY: success = ping_rcv(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 679699e953f1..2d650dc24349 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -32,6 +32,7 @@ #include <net/lwtunnel.h> #include <net/fib_notifier.h> +#include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -2355,6 +2356,10 @@ static int __net_init fib6_net_init(struct net *net) if (err) return err; + /* Default to 3-tuple */ + net->ipv6.sysctl.multipath_hash_fields = + FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK; + spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); INIT_LIST_HEAD(&net->ipv6.fib6_walkers); @@ -2362,7 +2367,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) - goto out_timer; + goto out_notifier; /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); @@ -2407,7 +2412,7 @@ out_fib_table_hash: kfree(net->ipv6.fib_table_hash); out_rt6_stats: kfree(net->ipv6.rt6_stats); -out_timer: +out_notifier: fib6_notifier_exit(net); return -ENOMEM; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ff4f9ebcf7f6..984050f35c61 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ - if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { + if (ipv6_addr_any(&fl6->saddr)) { struct fib6_info *from; struct rt6_info *rt; - bool had_dst = *dst != NULL; - if (!had_dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; rcu_read_lock(); @@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * never existed and let the SA-enabled version take * over. */ - if (!had_dst && (*dst)->error) { + if ((*dst)->error) { dst_release(*dst); *dst = NULL; } @@ -1555,7 +1553,7 @@ emsgsize: unsigned int datalen; unsigned int fraglen; unsigned int fraggap; - unsigned int alloclen; + unsigned int alloclen, alloc_extra; unsigned int pagedlen; alloc_new_skb: /* There's no room in the current skb */ @@ -1582,17 +1580,28 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; pagedlen = 0; + alloc_extra = hh_len; + alloc_extra += dst_exthdrlen; + alloc_extra += rt->dst.trailer_len; + + /* We just reserve space for fragment header. + * Note: this may be overallocation if the message + * (without MSG_MORE) fits into the MTU. + */ + alloc_extra += sizeof(struct frag_hdr); + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else if (!paged) + else if (!paged && + (fraglen + alloc_extra < SKB_MAX_ALLOC || + !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = min_t(int, fraglen, MAX_HEADER); pagedlen = fraglen - alloclen; } - - alloclen += dst_exthdrlen; + alloclen += alloc_extra; if (datalen != length + fraggap) { /* @@ -1602,30 +1611,21 @@ alloc_new_skb: datalen += rt->dst.trailer_len; } - alloclen += rt->dst.trailer_len; fraglen = datalen + fragheaderlen; - /* - * We just reserve space for fragment header. - * Note: this may be overallocation if the message - * (without MSG_MORE) fits into the MTU. - */ - alloclen += sizeof(struct frag_hdr); - copy = datalen - transhdrlen - fraggap - pagedlen; if (copy < 0) { err = -EINVAL; goto error; } if (transhdrlen) { - skb = sock_alloc_send_skb(sk, - alloclen + hh_len, + skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) - skb = alloc_skb(alloclen + hh_len, + skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 288bafded998..322698d9fcf4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; + skb_reset_mac_header(skb); } skb_reset_network_header(skb); @@ -1239,8 +1240,6 @@ route_lookup: if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; - skb_set_inner_ipproto(skb, proto); - err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) return err; @@ -1377,6 +1376,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + skb_set_inner_ipproto(skb, protocol); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, protocol); if (err != 0) { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index daef890460b7..15f984be3570 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -172,14 +172,12 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type ipcomp6_type = { - .description = "IPCOMP6", .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct xfrm6_protocol ipcomp6_protocol = { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d36ef9d25e73..54ec163fbafa 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1729,22 +1729,25 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) { + u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, + 2, 0, 0, IPV6_TLV_PADN, 0 }; struct net_device *dev = idev->dev; - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; - struct sk_buff *skb; - struct mld2_report *pmr; - struct in6_addr addr_buf; - const struct in6_addr *saddr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - unsigned int size = mtu + hlen + tlen; + struct net *net = dev_net(dev); + const struct in6_addr *saddr; + struct in6_addr addr_buf; + struct mld2_report *pmr; + struct sk_buff *skb; + unsigned int size; + struct sock *sk; int err; - u8 ra[8] = { IPPROTO_ICMPV6, 0, - IPV6_TLV_ROUTERALERT, 2, 0, 0, - IPV6_TLV_PADN, 0 }; - /* we assume size > sizeof(ra) here */ + sk = net->ipv6.igmp_sk; + /* we assume size > sizeof(ra) here + * Also try to not allocate high-order pages for big MTU + */ + size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen; skb = sock_alloc_send_skb(sk, size, 1, &err); if (!skb) return NULL; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 878fcec14949..aeb35d26e474 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -247,54 +247,6 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, return err; } -static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, - u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb_tail_pointer(skb) - - skb_network_header(skb); - int found_rhdr = 0; - - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - found_rhdr = 1; - break; - case NEXTHDR_DEST: - /* - * HAO MUST NOT appear more than once. - * XXX: It is better to try to find by the end of - * XXX: packet if HAO exists. - */ - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - net_dbg_ratelimited("mip6: hao exists already, override\n"); - return offset; - } - - if (found_rhdr) - return offset; - - break; - default: - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - } - - return offset; -} - static int mip6_destopt_init_state(struct xfrm_state *x) { if (x->id.spi) { @@ -324,7 +276,6 @@ static void mip6_destopt_destroy(struct xfrm_state *x) } static const struct xfrm_type mip6_destopt_type = { - .description = "MIP6DESTOPT", .owner = THIS_MODULE, .proto = IPPROTO_DSTOPTS, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, @@ -333,7 +284,6 @@ static const struct xfrm_type mip6_destopt_type = { .input = mip6_destopt_input, .output = mip6_destopt_output, .reject = mip6_destopt_reject, - .hdr_offset = mip6_destopt_offset, }; static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) @@ -383,53 +333,6 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, - u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb_tail_pointer(skb) - - skb_network_header(skb); - int found_rhdr = 0; - - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - if (offset + 3 <= packet_len) { - struct ipv6_rt_hdr *rt; - rt = (struct ipv6_rt_hdr *)(nh + offset); - if (rt->type != 0) - return offset; - } - found_rhdr = 1; - break; - case NEXTHDR_DEST: - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - return offset; - - if (found_rhdr) - return offset; - - break; - default: - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - } - - return offset; -} - static int mip6_rthdr_init_state(struct xfrm_state *x) { if (x->id.spi) { @@ -456,7 +359,6 @@ static void mip6_rthdr_destroy(struct xfrm_state *x) } static const struct xfrm_type mip6_rthdr_type = { - .description = "MIP6RT", .owner = THIS_MODULE, .proto = IPPROTO_ROUTING, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, @@ -464,7 +366,6 @@ static const struct xfrm_type mip6_rthdr_type = { .destructor = mip6_rthdr_destroy, .input = mip6_rthdr_input, .output = mip6_rthdr_output, - .hdr_offset = mip6_rthdr_offset, }; static int __init mip6_init(void) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e810a23baf99..de2cf3943b91 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -51,7 +51,7 @@ ip6_packet_match(const struct sk_buff *skb, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, - int *fragoff, bool *hotdrop) + u16 *fragoff, bool *hotdrop) { unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 7969d1f3018d..ed69c768797e 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -28,7 +28,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb, + nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb, nft_hook(pkt)); break; default: diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index af36acc1a644..2880dc7d9a49 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -15,29 +15,11 @@ static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { - const struct { - struct in6_addr dst; - struct in6_addr src; - } __aligned(SIPHASH_ALIGNMENT) combined = { - .dst = *dst, - .src = *src, - }; - u32 hash, id; - - /* Note the following code is not safe, but this is okay. */ - if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) - get_random_bytes(&net->ipv4.ip_id_key, - sizeof(net->ipv4.ip_id_key)); - - hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); - - /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, - * set the hight order instead thus minimizing possible future - * collisions. - */ - id = ip_idents_reserve(hash, 1); - if (unlikely(!id)) - id = 1 << 31; + u32 id; + + do { + id = prandom_u32(); + } while (!id); return id; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bf3646b57c68..60f1e4f5be5a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -354,7 +354,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, if (np->recverr || harderr) { sk->sk_err = err; - sk->sk_error_report(sk); + sk_error_report(sk); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d417e514bd52..7b756a7dc036 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2326,12 +2326,131 @@ out: } } +static u32 rt6_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 rt6_multipath_custom_hash_fl6(const struct net *net, + const struct flowi6 *fl6) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v6addrs.src = fl6->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v6addrs.dst = fl6->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl6->flowi6_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl6->fl6_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl6->fl6_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl6 can be NULL */ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *flkeys) { struct flow_keys hash_keys; - u32 mhash; + u32 mhash = 0; switch (ip6_multipath_hash_policy(net)) { case 0: @@ -2345,6 +2464,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); break; case 1: if (skb) { @@ -2376,6 +2496,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2412,9 +2533,15 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); + break; + case 3: + if (skb) + mhash = rt6_multipath_custom_hash_skb(net, skb); + else + mhash = rt6_multipath_custom_hash_fl6(net, fl6); break; } - mhash = flow_hash_from_keys(&hash_keys); return mhash >> 1; } diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 4ff38cb08f4b..60bf3b877957 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -87,10 +87,10 @@ struct seg6_end_dt_info { int vrf_ifindex; int vrf_table; - /* tunneled packet proto and family (IPv4 or IPv6) */ - __be16 proto; + /* tunneled packet family (IPv4 or IPv6). + * Protocol and header length are inferred from family. + */ u16 family; - int hdrlen; }; struct pcpu_seg6_local_counters { @@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, info->net = net; info->vrf_ifindex = vrf_ifindex; - switch (family) { - case AF_INET: - info->proto = htons(ETH_P_IP); - info->hdrlen = sizeof(struct iphdr); - break; - case AF_INET6: - info->proto = htons(ETH_P_IPV6); - info->hdrlen = sizeof(struct ipv6hdr); - break; - default: - return -EINVAL; - } - info->family = family; info->mode = DT_VRF_MODE; @@ -622,22 +609,44 @@ error: } static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, u16 family) { struct seg6_end_dt_info *info = &slwt->dt_info; struct net_device *vrf; + __be16 protocol; + int hdrlen; vrf = end_dt_get_vrf_rcu(skb, info); if (unlikely(!vrf)) goto drop; - skb->protocol = info->proto; + switch (family) { + case AF_INET: + protocol = htons(ETH_P_IP); + hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + protocol = htons(ETH_P_IPV6); + hdrlen = sizeof(struct ipv6hdr); + break; + case AF_UNSPEC: + fallthrough; + default: + goto drop; + } + + if (unlikely(info->family != AF_UNSPEC && info->family != family)) { + pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); + goto drop; + } + + skb->protocol = protocol; skb_dst_drop(skb); - skb_set_transport_header(skb, info->hdrlen); + skb_set_transport_header(skb, hdrlen); - return end_dt_vrf_rcv(skb, info->family, vrf); + return end_dt_vrf_rcv(skb, family, vrf); drop: kfree_skb(skb); @@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb, goto legacy_mode; /* DT6_VRF_MODE */ - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET6); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -767,6 +776,36 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); +} + +static int input_action_end_dt46(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + unsigned int off = 0; + int nexthdr; + + nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); + if (unlikely(nexthdr < 0)) + goto drop; + + switch (nexthdr) { + case IPPROTO_IPIP: + return input_action_end_dt4(skb, slwt); + case IPPROTO_IPV6: + return input_action_end_dt6(skb, slwt); + } + +drop: + kfree_skb(skb); + return -EINVAL; +} +#endif + /* push an SRH on top of the current one */ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dt6, }, { + .action = SEG6_LOCAL_ACTION_END_DT46, + .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), + .optattrs = SEG6_F_LOCAL_COUNTERS, +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt46, + .slwt_ops = { + .build_state = seg6_end_dt46_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_B6, .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), .optattrs = SEG6_F_LOCAL_COUNTERS, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f7c8110ece5f..df5bea818410 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb) * old iph is no longer valid */ iph = (const struct iphdr *)skb_mac_header(skb); + skb_reset_mac_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; + skb_reset_mac_header(skb); + return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } @@ -973,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (df) { mtu = dst_mtu(&rt->dst) - t_hlen; - if (mtu < 68) { + if (mtu < IPV4_MIN_MTU) { dev->stats.collisions++; ip_rt_put(rt); goto tx_error; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 27102c3d6e1d..d7cf26f730d7 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -17,13 +17,17 @@ #include <net/addrconf.h> #include <net/inet_frag.h> #include <net/netevent.h> +#include <net/ip_fib.h> #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif static int two = 2; +static int three = 3; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; +static u32 rt6_multipath_hash_fields_all_mask = + FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -40,6 +44,22 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, return ret; } +static int +proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct net *net; + int ret; + + net = container_of(table->data, struct net, + ipv6.sysctl.multipath_hash_fields); + ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); + + return ret; +} + static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", @@ -149,7 +169,16 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, + }, + { + .procname = "fib_multipath_hash_fields", + .data = &init_net.ipv6.sysctl.multipath_hash_fields, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_rt6_multipath_hash_fields, + .extra1 = SYSCTL_ONE, + .extra2 = &rt6_multipath_hash_fields_all_mask, }, { .procname = "seg6_flowlabel", diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5f47c0b6e3de..578ab6305c3f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -467,7 +467,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk)) { sk->sk_err = err; - sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); } else @@ -486,7 +486,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; - sk->sk_error_report(sk); + sk_error_report(sk); } else sk->sk_err_soft = err; @@ -1538,6 +1538,7 @@ discard: kfree_skb(skb); return 0; csum_err: + trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1663,10 +1664,18 @@ process: goto csum_error; } if (unlikely(sk->sk_state != TCP_LISTEN)) { - inet_csk_reqsk_queue_drop_and_put(sk, req); - goto lookup; + nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); + if (!nsk) { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } + sk = nsk; + /* reuseport_migrate_sock() has already held one sk_refcnt + * before returning. + */ + } else { + sock_hold(sk); } - sock_hold(sk); refcounted = true; nsk = NULL; if (!tcp_filter(sk, skb)) { @@ -1754,6 +1763,7 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: + trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3fcd86f4dfdc..368972dbd919 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -610,7 +610,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } sk->sk_err = err; - sk->sk_error_report(sk); + sk_error_report(sk); out: return 0; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8b84d534b19d..57fa27c1cdf9 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -16,13 +16,6 @@ #include <net/ip6_route.h> #include <net/xfrm.h> -int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, - u8 **prevhdr) -{ - return ip6_find_1stfragopt(skb, prevhdr); -} -EXPORT_SYMBOL(xfrm6_find_1stfragopt); - void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f696d46e6910..2b31112c0856 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -291,7 +291,6 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x) } static const struct xfrm_type xfrm6_tunnel_type = { - .description = "IP6IP6", .owner = THIS_MODULE, .proto = IPPROTO_IPV6, .init_state = xfrm6_tunnel_init_state, |