diff options
Diffstat (limited to 'net/ipv6')
33 files changed, 906 insertions, 139 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 01146b66d666..eff2cacd5209 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1997,6 +1997,7 @@ EXPORT_SYMBOL(ipv6_chk_prefix); * ipv6_dev_find - find the first device with a given source address. * @net: the net namespace * @addr: the source address + * @dev: used to find the L3 domain of interest * * The caller should be protected by RCU, or RTNL. */ @@ -5022,8 +5023,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; if (args->netnsid >= 0 && - nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || @@ -5054,8 +5057,10 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; if (args->netnsid >= 0 && - nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 642fc6ac13d2..8a22486cf270 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net, /* add default label */ static int __net_init ip6addrlbl_net_init(struct net *net) { - int err = 0; + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *n; + int err; int i; ADDRLABEL(KERN_DEBUG "%s\n", __func__); @@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net) INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(net, - ip6addrlbl_init_table[i].prefix, - ip6addrlbl_init_table[i].prefixlen, - 0, - ip6addrlbl_init_table[i].label, 0); - /* XXX: should we free all rules when we catch an error? */ - if (ret && (!err || err != -ENOMEM)) - err = ret; + err = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + if (err) + goto err_ip6addrlbl_add; + } + return 0; + +err_ip6addrlbl_add: + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + kfree_rcu(p, rcu); } return err; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e648fbebb167..a7e3d170af51 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr); + err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr); if (err) return err; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index d88d97617f7e..440080da805b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -588,7 +588,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); - if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN)) + err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN); + if (err) goto out_free; ip6h->priority = 0; diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 78f766019b7e..51184a70ac7e 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -423,7 +423,7 @@ static void calipso_doi_free_rcu(struct rcu_head *entry) /** * calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine * @doi: the DOI value - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes a DOI definition from the CALIPSO engine. The NetLabel routines will @@ -1226,7 +1226,7 @@ static int calipso_req_setattr(struct request_sock *req, /** * calipso_req_delattr - Delete the CALIPSO option from a request socket - * @reg: the request socket + * @req: the request socket * * Description: * Removes the CALIPSO option from a request socket, if present. diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 374105e4394f..6126f8bf94b3 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -906,11 +906,6 @@ void ipv6_exthdrs_exit(void) /* * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input(). */ -static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) -{ - return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev); -} - static inline struct net *ipv6_skb_net(struct sk_buff *skb) { return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ec448b71bf9a..8956144ea65e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -158,7 +158,13 @@ static bool is_ineligible(const struct sk_buff *skb) tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); - if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) + + /* Based on RFC 8200, Section 4.5 Fragment Header, return + * false if this is a fragment packet with no icmp header info. + */ + if (!tp && frag_off != 0) + return false; + else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 931b186d2e48..c3bc89b6b1a1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1133,8 +1133,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->needed_headroom = rt->dst.dev->hard_header_len + - t_hlen; + unsigned short dst_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (t->dev->header_ops) + dev->hard_header_len = dst_len; + else + dev->needed_headroom = dst_len; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; @@ -1159,7 +1164,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + + if (tunnel->dev->header_ops) + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + else + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + return t_hlen; } @@ -1391,7 +1401,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_do_ioctl = ip6gre_tunnel_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1828,7 +1838,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1896,7 +1906,7 @@ static const struct net_device_ops ip6erspan_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a0217e5bf3bc..a7950baa05e5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -94,36 +94,6 @@ static inline int ip6_tnl_mpls_supported(void) return IS_ENABLED(CONFIG_MPLS); } -static struct net_device_stats *ip6_get_stats(struct net_device *dev) -{ - struct pcpu_sw_netstats tmp, sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - unsigned int start; - const struct pcpu_sw_netstats *tstats = - per_cpu_ptr(dev->tstats, i); - - do { - start = u64_stats_fetch_begin_irq(&tstats->syncp); - tmp.rx_packets = tstats->rx_packets; - tmp.rx_bytes = tstats->rx_bytes; - tmp.tx_packets = tstats->tx_packets; - tmp.tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); - - sum.rx_packets += tmp.rx_packets; - sum.rx_bytes += tmp.rx_bytes; - sum.tx_packets += tmp.tx_packets; - sum.tx_bytes += tmp.tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_ip6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -204,6 +174,7 @@ ip6_tnl_lookup(struct net *net, int link, /** * ip6_tnl_bucket - get head of list matching given tunnel parameters + * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: @@ -230,6 +201,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) /** * ip6_tnl_link - add tunnel to hash table + * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be added **/ @@ -246,6 +218,7 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) /** * ip6_tnl_unlink - remove tunnel from hash table + * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be removed **/ @@ -417,6 +390,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) /** * parse_tvl_tnl_enc_lim - handle encapsulation limit option * @skb: received socket buffer + * @raw: the ICMPv6 error message data * * Return: * 0 if none was found, @@ -485,14 +459,9 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); -/** - * ip6_tnl_err - tunnel error handler - * - * Description: - * ip6_tnl_err() should handle errors in the tunnel according - * to the specifications in RFC 2473. - **/ - +/* ip6_tnl_err() should handle errors in the tunnel according to the + * specifications in RFC 2473. + */ static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, u8 *type, u8 *code, int *msg, __u32 *info, int offset) @@ -1271,6 +1240,8 @@ route_lookup: if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; + skb_set_inner_ipproto(skb, proto); + err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) return err; @@ -1280,8 +1251,6 @@ route_lookup: ipv6_push_frag_opts(skb, &opt.ops, &proto); } - skb_set_inner_ipproto(skb, proto); - skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1835,7 +1804,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats = ip6_get_stats, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5f9c4fdc120d..0225fd694192 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -125,6 +125,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, /** * vti6_tnl_bucket - get head of list matching given tunnel parameters + * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: @@ -889,7 +890,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 43a894bf9a1b..a6804a7e34c1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1148,7 +1148,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; - msg.msg_control = optval; + msg.msg_control_user = optval; msg.msg_controllen = len; msg.msg_flags = flags; msg.msg_control_is_user = true; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 8cd2782a31e4..6c8604390266 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -548,7 +548,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct sockaddr_storage *p) + struct sockaddr_storage __user *p) { int err, i, count, copycount; const struct in6_addr *group; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 27f29b957ee7..76717478f173 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); +static int ndisc_is_multicast(const void *pkey); static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, @@ -115,6 +116,7 @@ struct neigh_table nd_tbl = { .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, + .is_multicast = ndisc_is_multicast, .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { @@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb) kfree_skb(skb); } +static int ndisc_is_multicast(const void *pkey) +{ + return ipv6_addr_is_multicast((struct in6_addr *)pkey); +} + static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0e942d082d..ab9a279dd6d4 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -20,10 +20,10 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include "../bridge/br_private.h" -int ip6_route_me_harder(struct net *net, struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk = sk_to_full_sk(skb->sk); + struct sock *sk = sk_to_full_sk(sk_partial); unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, skb); + return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = t->private; + struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 3ac5485049f0..a35019d2e480 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -61,7 +61,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) /* Do nothing */ break; case IP6T_TCP_RESET: - nf_send_reset6(net, skb, xt_hooknum(par)); + nf_send_reset6(net, par->state->sk, skb, xt_hooknum(par)); break; case IP6T_ICMP6_POLICY_FAIL: nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par)); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 1a2748611e00..cee74803d7a1 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 054d287eb13d..c129ad334eb3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -440,6 +440,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; + u8 nexthdr = NEXTHDR_FRAGMENT; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -455,6 +456,14 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return 0; + /* Discard the first fragment if it does not include all headers + * RFC 8200, Section 4.5 + */ + if (ipv6frag_thdr_truncated(skb, fhoff, &nexthdr)) { + pr_debug("Drop incomplete fragment\n"); + return 0; + } + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return -ENOMEM; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 4aef6baaa55e..570d1d76c44d 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,6 +12,140 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> +static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + ip6h = ipv6_hdr(skb); + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + if (!nf_reject_verify_csum(proto)) + return true; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +static int nf_reject_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + +struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nf_reject_ip6hdr_validate(oldskb)) + return NULL; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return NULL; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v6_tcp_reset); + +struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + + if (!nf_reject_ip6hdr_validate(oldskb)) + return NULL; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return NULL; + + if (!nf_reject_v6_csum_ok(oldskb, hook)) + return NULL; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + skb_put_data(nskb, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v6_unreach); + const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *otcph, unsigned int *otcplen, int hook) @@ -141,7 +275,8 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) return 0; } -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, + int hook) { struct net_device *br_indev __maybe_unused; struct sk_buff *nskb; @@ -170,7 +305,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; - if (hook == NF_INET_PRE_ROUTING) { + if (hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) { nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (!dst) return; @@ -233,7 +368,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) dev_queue_xmit(nskb); } else #endif - ip6_local_out(net, nskb->sk, nskb); + ip6_local_out(net, sk, nskb); } EXPORT_SYMBOL_GPL(nf_send_reset6); @@ -268,7 +403,8 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = net->loopback_dev; - if (hooknum == NF_INET_PRE_ROUTING && nf_reject6_fill_skb_dst(skb_in)) + if ((hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_INGRESS) && + nf_reject6_fill_skb_dst(skb_in) < 0) return; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index c1098a1968e1..7969d1f3018d 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -28,7 +28,8 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); + nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb, + nft_hook(pkt)); break; default: break; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index bbff3e02e302..d6306aa46bb1 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -126,6 +126,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), + SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS), SNMP_MIB_SENTINEL }; @@ -137,6 +138,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1f5d4d196dcc..47a0dc46cbdb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -42,6 +42,8 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <net/sock.h> #include <net/snmp.h> @@ -322,6 +324,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + u8 nexthdr; int iif; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) @@ -351,6 +354,20 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } + /* RFC 8200, Section 4.5 Fragment Header: + * If the first fragment does not include all headers through an + * Upper-Layer header, then that fragment should be discarded and + * an ICMP Parameter Problem, Code 3, message should be sent to + * the source of the fragment, with the Pointer field set to zero. + */ + nexthdr = hdr->nexthdr; + if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); + return -1; + } + iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7e0ce7af8234..188e114b29b4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5558,6 +5558,10 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) goto nla_put_failure; + + if (dst->lwtstate && + lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + goto nla_put_failure; } else if (rt->fib6_nsiblings) { struct fib6_info *sibling, *next_sibling; struct nlattr *mp; @@ -6039,11 +6043,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, struct sk_buff *skb; int err = -ENOBUFS; - /* call_fib6_entry_notifiers will be removed when in-kernel notifier - * is implemented and supported for nexthop objects - */ - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL); - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (!skb) goto errout; diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c index 307f336b5353..488aec9e1a74 100644 --- a/net/ipv6/rpl.c +++ b/net/ipv6/rpl.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * Authors: * (C) 2020 Alexander Aring <alex.aring@gmail.com> */ diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 5fdf3ebb953f..ff691d9f4a04 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * Authors: * (C) 2020 Alexander Aring <alex.aring@gmail.com> */ @@ -190,18 +190,13 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) { struct dst_entry *dst = skb_dst(skb); struct rpl_iptunnel_encap *tinfo; - int err = 0; if (skb->protocol != htons(ETH_P_IPV6)) return -EINVAL; tinfo = rpl_encap_lwtunnel(dst->lwtstate); - err = rpl_do_srh_inline(skb, rlwt, tinfo->srh); - if (err) - return err; - - return 0; + return rpl_do_srh_inline(skb, rlwt, tinfo->srh); } static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 85dddfe3a2c6..687d95dce085 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -35,7 +35,6 @@ #include <net/xfrm.h> #include <crypto/hash.h> -#include <crypto/sha.h> #include <net/seg6.h> #include <net/genetlink.h> #include <net/seg6_hmac.h> diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index eba23279912d..b07f7c1c82a4 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -33,11 +33,35 @@ struct seg6_local_lwt; +/* callbacks used for customizing the creation and destruction of a behavior */ +struct seg6_local_lwtunnel_ops { + int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack); + void (*destroy_state)(struct seg6_local_lwt *slwt); +}; + struct seg6_action_desc { int action; unsigned long attrs; + + /* The optattrs field is used for specifying all the optional + * attributes supported by a specific behavior. + * It means that if one of these attributes is not provided in the + * netlink message during the behavior creation, no errors will be + * returned to the userspace. + * + * Each attribute can be only of two types (mutually exclusive): + * 1) required or 2) optional. + * Every user MUST obey to this rule! If you set an attribute as + * required the same attribute CANNOT be set as optional and vice + * versa. + */ + unsigned long optattrs; + int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int static_headroom; + + struct seg6_local_lwtunnel_ops slwt_ops; }; struct bpf_lwt_prog { @@ -45,6 +69,28 @@ struct bpf_lwt_prog { char *name; }; +enum seg6_end_dt_mode { + DT_INVALID_MODE = -EINVAL, + DT_LEGACY_MODE = 0, + DT_VRF_MODE = 1, +}; + +struct seg6_end_dt_info { + enum seg6_end_dt_mode mode; + + struct net *net; + /* VRF device associated to the routing table used by the SRv6 + * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. + */ + int vrf_ifindex; + int vrf_table; + + /* tunneled packet proto and family (IPv4 or IPv6) */ + __be16 proto; + u16 family; + int hdrlen; +}; + struct seg6_local_lwt { int action; struct ipv6_sr_hdr *srh; @@ -54,9 +100,16 @@ struct seg6_local_lwt { int iif; int oif; struct bpf_lwt_prog bpf; +#ifdef CONFIG_NET_L3_MASTER_DEV + struct seg6_end_dt_info dt_info; +#endif int headroom; struct seg6_action_desc *desc; + /* unlike the required attrs, we have to track the optional attributes + * that have been effectively parsed. + */ + unsigned long parsed_optattrs; }; static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) @@ -401,6 +454,248 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) +{ + const struct nl_info *nli = &fib6_cfg->fc_nlinfo; + + return nli->nl_net; +} + +static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, + u16 family, struct netlink_ext_ack *extack) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + int vrf_ifindex; + struct net *net; + + net = fib6_config_get_net(cfg); + + /* note that vrf_table was already set by parse_nla_vrftable() */ + vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, + info->vrf_table); + if (vrf_ifindex < 0) { + if (vrf_ifindex == -EPERM) { + NL_SET_ERR_MSG(extack, + "Strict mode for VRF is disabled"); + } else if (vrf_ifindex == -ENODEV) { + NL_SET_ERR_MSG(extack, + "Table has no associated VRF device"); + } else { + pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", + vrf_ifindex); + } + + return vrf_ifindex; + } + + info->net = net; + info->vrf_ifindex = vrf_ifindex; + + switch (family) { + case AF_INET: + info->proto = htons(ETH_P_IP); + info->hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + info->proto = htons(ETH_P_IPV6); + info->hdrlen = sizeof(struct ipv6hdr); + break; + default: + return -EINVAL; + } + + info->family = family; + info->mode = DT_VRF_MODE; + + return 0; +} + +/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and + * routes the IPv4/IPv6 packet by looking at the configured routing table. + * + * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment + * Routing Header packets) from several interfaces and the outer IPv6 + * destination address (DA) is used for retrieving the specific instance of the + * End.DT4/DT6 behavior that should process the packets. + * + * However, the inner IPv4/IPv6 packet is not really bound to any receiving + * interface and thus the End.DT4/DT6 sets the VRF (associated with the + * corresponding routing table) as the *receiving* interface. + * In other words, the End.DT4/DT6 processes a packet as if it has been received + * directly by the VRF (and not by one of its slave devices, if any). + * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in + * according to the routing table configured by the End.DT4/DT6 instance. + * + * This design allows you to get some interesting features like: + * 1) the statistics on rx packets; + * 2) the possibility to install a packet sniffer on the receiving interface + * (the VRF one) for looking at the incoming packets; + * 3) the possibility to leverage the netfilter prerouting hook for the inner + * IPv4 packet. + * + * This function returns: + * - the sk_buff* when the VRF rcv handler has processed the packet correctly; + * - NULL when the skb is consumed by the VRF rcv handler; + * - a pointer which encodes a negative error number in case of error. + * Note that in this case, the function takes care of freeing the skb. + */ +static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family, + struct net_device *dev) +{ + /* based on l3mdev_ip_rcv; we are only interested in the master */ + if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev))) + goto drop; + + if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv)) + goto drop; + + /* the decap packet IPv4/IPv6 does not come with any mac header info. + * We must unset the mac header to allow the VRF device to rebuild it, + * just in case there is a sniffer attached on the device. + */ + skb_unset_mac_header(skb); + + skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family); + if (!skb) + /* the skb buffer was consumed by the handler */ + return NULL; + + /* when a packet is received by a VRF or by one of its slaves, the + * master device reference is set into the skb. + */ + if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex)) + goto drop; + + return skb; + +drop: + kfree_skb(skb); + return ERR_PTR(-EINVAL); +} + +static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb, + struct seg6_end_dt_info *info) +{ + int vrf_ifindex = info->vrf_ifindex; + struct net *net = info->net; + + if (unlikely(vrf_ifindex < 0)) + goto error; + + if (unlikely(!net_eq(dev_net(skb->dev), net))) + goto error; + + return dev_get_by_index_rcu(net, vrf_ifindex); + +error: + return NULL; +} + +static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + struct net_device *vrf; + + vrf = end_dt_get_vrf_rcu(skb, info); + if (unlikely(!vrf)) + goto drop; + + skb->protocol = info->proto; + + skb_dst_drop(skb); + + skb_set_transport_header(skb, info->hdrlen); + + return end_dt_vrf_rcv(skb, info->family, vrf); + +drop: + kfree_skb(skb); + return ERR_PTR(-EINVAL); +} + +static int input_action_end_dt4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct iphdr *iph; + int err; + + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb = end_dt_vrf_core(skb, slwt); + if (!skb) + /* packet has been processed and consumed by the VRF */ + return 0; + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + iph = ip_hdr(skb); + + err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); + if (unlikely(err)) + goto drop; + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack); +} + +static enum +seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) +{ + unsigned long parsed_optattrs = slwt->parsed_optattrs; + bool legacy, vrfmode; + + legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE)); + vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE)); + + if (!(legacy ^ vrfmode)) + /* both are absent or present: invalid DT6 mode */ + return DT_INVALID_MODE; + + return legacy ? DT_LEGACY_MODE : DT_VRF_MODE; +} + +static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + + return info->mode; +} + +static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt); + struct seg6_end_dt_info *info = &slwt->dt_info; + + switch (mode) { + case DT_LEGACY_MODE: + info->mode = DT_LEGACY_MODE; + return 0; + case DT_VRF_MODE: + return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack); + default: + NL_SET_ERR_MSG(extack, "table or vrftable must be specified"); + return -EINVAL; + } +} +#endif + static int input_action_end_dt6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -410,6 +705,28 @@ static int input_action_end_dt6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; +#ifdef CONFIG_NET_L3_MASTER_DEV + if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE) + goto legacy_mode; + + /* DT6_VRF_MODE */ + skb = end_dt_vrf_core(skb, slwt); + if (!skb) + /* packet has been processed and consumed by the VRF */ + return 0; + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + /* note: this time we do not need to specify the table because the VRF + * takes care of selecting the correct table. + */ + seg6_lookup_any_nexthop(skb, NULL, 0, true); + + return dst_input(skb); + +legacy_mode: +#endif skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); @@ -590,8 +907,27 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dx4, }, { + .action = SEG6_LOCAL_ACTION_END_DT4, + .attrs = (1 << SEG6_LOCAL_VRFTABLE), +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt4, + .slwt_ops = { + .build_state = seg6_end_dt4_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_DT6, +#ifdef CONFIG_NET_L3_MASTER_DEV + .attrs = 0, + .optattrs = (1 << SEG6_LOCAL_TABLE) | + (1 << SEG6_LOCAL_VRFTABLE), + .slwt_ops = { + .build_state = seg6_end_dt6_build, + }, +#else .attrs = (1 << SEG6_LOCAL_TABLE), +#endif .input = input_action_end_dt6, }, { @@ -649,6 +985,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, + [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, @@ -710,6 +1047,11 @@ static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return memcmp(a->srh, b->srh, len); } +static void destroy_attr_srh(struct seg6_local_lwt *slwt) +{ + kfree(slwt->srh); +} + static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) { slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); @@ -733,6 +1075,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return 0; } +static struct +seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + return &slwt->dt_info; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +static int parse_nla_vrftable(struct nlattr **attrs, + struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); + + if (IS_ERR(info)) + return PTR_ERR(info); + + info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]); + + return 0; +} + +static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); + + if (IS_ERR(info)) + return PTR_ERR(info); + + if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a); + struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b); + + if (info_a->vrf_table != info_b->vrf_table) + return 1; + + return 0; +} + static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) { memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), @@ -901,16 +1290,30 @@ static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return strcmp(a->bpf.name, b->bpf.name); } +static void destroy_attr_bpf(struct seg6_local_lwt *slwt) +{ + kfree(slwt->bpf.name); + if (slwt->bpf.prog) + bpf_prog_put(slwt->bpf.prog); +} + struct seg6_action_param { int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); + + /* optional destroy() callback useful for releasing resources which + * have been previously acquired in the corresponding parse() + * function. + */ + void (*destroy)(struct seg6_local_lwt *slwt); }; static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, .put = put_nla_srh, - .cmp = cmp_nla_srh }, + .cmp = cmp_nla_srh, + .destroy = destroy_attr_srh }, [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, .put = put_nla_table, @@ -934,14 +1337,130 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, .put = put_nla_bpf, - .cmp = cmp_nla_bpf }, + .cmp = cmp_nla_bpf, + .destroy = destroy_attr_bpf }, + + [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable, + .put = put_nla_vrftable, + .cmp = cmp_nla_vrftable }, }; +/* call the destroy() callback (if available) for each set attribute in + * @parsed_attrs, starting from the first attribute up to the @max_parsed + * (excluded) attribute. + */ +static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, + struct seg6_local_lwt *slwt) +{ + struct seg6_action_param *param; + int i; + + /* Every required seg6local attribute is identified by an ID which is + * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask; + * + * We scan the 'parsed_attrs' bitmask, starting from the first attribute + * up to the @max_parsed (excluded) attribute. + * For each set attribute, we retrieve the corresponding destroy() + * callback. If the callback is not available, then we skip to the next + * attribute; otherwise, we call the destroy() callback. + */ + for (i = 0; i < max_parsed; ++i) { + if (!(parsed_attrs & (1 << i))) + continue; + + param = &seg6_action_params[i]; + + if (param->destroy) + param->destroy(slwt); + } +} + +/* release all the resources that may have been acquired during parsing + * operations. + */ +static void destroy_attrs(struct seg6_local_lwt *slwt) +{ + unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs; + + __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt); +} + +static int parse_nla_optional_attrs(struct nlattr **attrs, + struct seg6_local_lwt *slwt) +{ + struct seg6_action_desc *desc = slwt->desc; + unsigned long parsed_optattrs = 0; + struct seg6_action_param *param; + int err, i; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { + if (!(desc->optattrs & (1 << i)) || !attrs[i]) + continue; + + /* once here, the i-th attribute is provided by the + * userspace AND it is identified optional as well. + */ + param = &seg6_action_params[i]; + + err = param->parse(attrs, slwt); + if (err < 0) + goto parse_optattrs_err; + + /* current attribute has been correctly parsed */ + parsed_optattrs |= (1 << i); + } + + /* store in the tunnel state all the optional attributed successfully + * parsed. + */ + slwt->parsed_optattrs = parsed_optattrs; + + return 0; + +parse_optattrs_err: + __destroy_attrs(parsed_optattrs, i, slwt); + + return err; +} + +/* call the custom constructor of the behavior during its initialization phase + * and after that all its attributes have been parsed successfully. + */ +static int +seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + struct seg6_action_desc *desc = slwt->desc; + struct seg6_local_lwtunnel_ops *ops; + + ops = &desc->slwt_ops; + if (!ops->build_state) + return 0; + + return ops->build_state(slwt, cfg, extack); +} + +/* call the custom destructor of the behavior which is invoked before the + * tunnel is going to be destroyed. + */ +static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) +{ + struct seg6_action_desc *desc = slwt->desc; + struct seg6_local_lwtunnel_ops *ops; + + ops = &desc->slwt_ops; + if (!ops->destroy_state) + return; + + ops->destroy_state(slwt); +} + static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) { struct seg6_action_param *param; struct seg6_action_desc *desc; + unsigned long invalid_attrs; int i, err; desc = __get_action_desc(slwt->action); @@ -954,6 +1473,26 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) slwt->desc = desc; slwt->headroom += desc->static_headroom; + /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be + * disjoined, this allow us to release acquired resources by optional + * attributes and by required attributes independently from each other + * without any interfarence. + * In other terms, we are sure that we do not release some the acquired + * resources twice. + * + * Note that if an attribute is configured both as required and as + * optional, it means that the user has messed something up in the + * seg6_action_table. Therefore, this check is required for SRv6 + * behaviors to work properly. + */ + invalid_attrs = desc->attrs & desc->optattrs; + if (invalid_attrs) { + WARN_ONCE(1, + "An attribute cannot be both required AND optional"); + return -EINVAL; + } + + /* parse the required attributes */ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { if (desc->attrs & (1 << i)) { if (!attrs[i]) @@ -963,11 +1502,24 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) err = param->parse(attrs, slwt); if (err < 0) - return err; + goto parse_attrs_err; } } + /* parse the optional attributes, if any */ + err = parse_nla_optional_attrs(attrs, slwt); + if (err < 0) + goto parse_attrs_err; + return 0; + +parse_attrs_err: + /* release any resource that may have been acquired during the i-1 + * parse() operations. + */ + __destroy_attrs(desc->attrs, i, slwt); + + return err; } static int seg6_local_build_state(struct net *net, struct nlattr *nla, @@ -1003,6 +1555,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, if (err < 0) goto out_free; + err = seg6_local_lwtunnel_build_state(slwt, cfg, extack); + if (err < 0) + goto out_destroy_attrs; + newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; newts->headroom = slwt->headroom; @@ -1011,8 +1567,9 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, return 0; +out_destroy_attrs: + destroy_attrs(slwt); out_free: - kfree(slwt->srh); kfree(newts); return err; } @@ -1021,12 +1578,9 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt) { struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); - kfree(slwt->srh); + seg6_local_lwtunnel_destroy_state(slwt); - if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) { - kfree(slwt->bpf.name); - bpf_prog_put(slwt->bpf.prog); - } + destroy_attrs(slwt); return; } @@ -1036,13 +1590,16 @@ static int seg6_local_fill_encap(struct sk_buff *skb, { struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); struct seg6_action_param *param; + unsigned long attrs; int i, err; if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) return -EMSGSIZE; + attrs = slwt->desc->attrs | slwt->parsed_optattrs; + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (slwt->desc->attrs & (1 << i)) { + if (attrs & (1 << i)) { param = &seg6_action_params[i]; err = param->put(skb, slwt); if (err < 0) @@ -1061,7 +1618,7 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) nlsize = nla_total_size(4); /* action */ - attrs = slwt->desc->attrs; + attrs = slwt->desc->attrs | slwt->parsed_optattrs; if (attrs & (1 << SEG6_LOCAL_SRH)) nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); @@ -1086,6 +1643,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) nla_total_size(MAX_PROG_NAME) + nla_total_size(4); + if (attrs & (1 << SEG6_LOCAL_VRFTABLE)) + nlsize += nla_total_size(4); + return nlsize; } @@ -1094,6 +1654,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, { struct seg6_local_lwt *slwt_a, *slwt_b; struct seg6_action_param *param; + unsigned long attrs_a, attrs_b; int i; slwt_a = seg6_local_lwtunnel(a); @@ -1102,11 +1663,14 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, if (slwt_a->action != slwt_b->action) return 1; - if (slwt_a->desc->attrs != slwt_b->desc->attrs) + attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs; + attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs; + + if (attrs_a != attrs_b) return 1; for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (slwt_a->desc->attrs & (1 << i)) { + if (attrs_a & (1 << i)) { param = &seg6_action_params[i]; if (param->cmp(slwt_a, slwt_b)) return 1; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5e2c34c0ac97..2da0ee703779 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1128,7 +1128,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); dev->mtu = tdev->mtu - t_hlen; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1396,7 +1395,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; @@ -1426,7 +1425,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index e796a64be308..9b6cae1e49d9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -136,7 +136,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; @@ -241,7 +241,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; + + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8db59f4e5f13..e254569a3005 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -527,15 +527,21 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; + + if (!INET_ECN_is_capable(tclass) && + tcp_bpf_ca_needs_ecn((struct sock *)req)) + tclass |= INET_ECN_ECT_0; + rcu_read_lock(); opt = ireq->ipv6_opt; - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos : np->tclass; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, - tclass & ~INET_ECN_MASK, - sk->sk_priority); + tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -823,9 +829,15 @@ static void tcp_v6_init_req(struct request_sock *req, } static struct dst_entry *tcp_v6_route_req(const struct sock *sk, + struct sk_buff *skb, struct flowi *fl, - const struct request_sock *req) + struct request_sock *req) { + tcp_v6_init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + return NULL; + return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } @@ -846,7 +858,6 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif - .init_req = tcp_v6_init_req, #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -1193,6 +1204,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; + bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1314,7 +1326,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (np->repflow) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; @@ -1368,7 +1382,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); @@ -1383,6 +1398,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * skb_set_owner_r(newnp->pktoptions, newsk); } } + } else { + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29d9691359b9..9008f5796ad4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -276,7 +276,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, inet6_sdif(skb), udptable, skb); } -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -285,7 +285,6 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, &iph->daddr, dport, inet6_iif(skb), inet6_sdif(skb), &udp_table, NULL); } -EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); /* Must be called under rcu_read_lock(). * Does increment socket refcount. @@ -560,7 +559,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk) { + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { @@ -637,6 +636,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rc == -ENOMEM) UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); + else + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_MEMERRORS, is_udplite); UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 584157a07759..c7bd7b1a04c1 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -28,10 +28,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int tnl_hlen; int err; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); @@ -48,6 +44,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) return __udp_gso_segment(skb, features); + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ @@ -111,12 +111,22 @@ out: return segs; } +static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + inet6_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -135,7 +145,10 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; rcu_read_lock(); - sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udpv6_encap_needed_key)) + sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); rcu_read_unlock(); return pp; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 25b7ebda2fab..f696d46e6910 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -303,13 +303,13 @@ static const struct xfrm_type xfrm6_tunnel_type = { static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = { .handler = xfrm6_tunnel_rcv, .err_handler = xfrm6_tunnel_err, - .priority = 2, + .priority = 3, }; static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = { .handler = xfrm6_tunnel_rcv, .err_handler = xfrm6_tunnel_err, - .priority = 2, + .priority = 3, }; static int __net_init xfrm6_tunnel_net_init(struct net *net) |