diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrlabel.c | 26 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 16 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 14 | ||||
-rw-r--r-- | net/ipv6/rpl_iptunnel.c | 7 | ||||
-rw-r--r-- | net/ipv6/seg6_local.c | 590 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 40 |
8 files changed, 649 insertions, 48 deletions
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 642fc6ac13d2..8a22486cf270 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net, /* add default label */ static int __net_init ip6addrlbl_net_init(struct net *net) { - int err = 0; + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *n; + int err; int i; ADDRLABEL(KERN_DEBUG "%s\n", __func__); @@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net) INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(net, - ip6addrlbl_init_table[i].prefix, - ip6addrlbl_init_table[i].prefixlen, - 0, - ip6addrlbl_init_table[i].label, 0); - /* XXX: should we free all rules when we catch an error? */ - if (ret && (!err || err != -ENOMEM)) - err = ret; + err = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + if (err) + goto err_ip6addrlbl_add; + } + return 0; + +err_ip6addrlbl_add: + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + kfree_rcu(p, rcu); } return err; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e648fbebb167..a7e3d170af51 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr); + err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr); if (err) return err; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8cf659994412..c3bc89b6b1a1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1133,8 +1133,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->needed_headroom = rt->dst.dev->hard_header_len + - t_hlen; + unsigned short dst_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (t->dev->header_ops) + dev->hard_header_len = dst_len; + else + dev->needed_headroom = dst_len; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; @@ -1159,7 +1164,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + + if (tunnel->dev->header_ops) + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + else + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + return t_hlen; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 43a894bf9a1b..a6804a7e34c1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1148,7 +1148,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; - msg.msg_control = optval; + msg.msg_control_user = optval; msg.msg_controllen = len; msg.msg_flags = flags; msg.msg_control_is_user = true; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = t->private; + struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 233da43dd8d7..ff691d9f4a04 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -190,18 +190,13 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) { struct dst_entry *dst = skb_dst(skb); struct rpl_iptunnel_encap *tinfo; - int err = 0; if (skb->protocol != htons(ETH_P_IPV6)) return -EINVAL; tinfo = rpl_encap_lwtunnel(dst->lwtstate); - err = rpl_do_srh_inline(skb, rlwt, tinfo->srh); - if (err) - return err; - - return 0; + return rpl_do_srh_inline(skb, rlwt, tinfo->srh); } static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index eba23279912d..b07f7c1c82a4 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -33,11 +33,35 @@ struct seg6_local_lwt; +/* callbacks used for customizing the creation and destruction of a behavior */ +struct seg6_local_lwtunnel_ops { + int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack); + void (*destroy_state)(struct seg6_local_lwt *slwt); +}; + struct seg6_action_desc { int action; unsigned long attrs; + + /* The optattrs field is used for specifying all the optional + * attributes supported by a specific behavior. + * It means that if one of these attributes is not provided in the + * netlink message during the behavior creation, no errors will be + * returned to the userspace. + * + * Each attribute can be only of two types (mutually exclusive): + * 1) required or 2) optional. + * Every user MUST obey to this rule! If you set an attribute as + * required the same attribute CANNOT be set as optional and vice + * versa. + */ + unsigned long optattrs; + int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int static_headroom; + + struct seg6_local_lwtunnel_ops slwt_ops; }; struct bpf_lwt_prog { @@ -45,6 +69,28 @@ struct bpf_lwt_prog { char *name; }; +enum seg6_end_dt_mode { + DT_INVALID_MODE = -EINVAL, + DT_LEGACY_MODE = 0, + DT_VRF_MODE = 1, +}; + +struct seg6_end_dt_info { + enum seg6_end_dt_mode mode; + + struct net *net; + /* VRF device associated to the routing table used by the SRv6 + * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. + */ + int vrf_ifindex; + int vrf_table; + + /* tunneled packet proto and family (IPv4 or IPv6) */ + __be16 proto; + u16 family; + int hdrlen; +}; + struct seg6_local_lwt { int action; struct ipv6_sr_hdr *srh; @@ -54,9 +100,16 @@ struct seg6_local_lwt { int iif; int oif; struct bpf_lwt_prog bpf; +#ifdef CONFIG_NET_L3_MASTER_DEV + struct seg6_end_dt_info dt_info; +#endif int headroom; struct seg6_action_desc *desc; + /* unlike the required attrs, we have to track the optional attributes + * that have been effectively parsed. + */ + unsigned long parsed_optattrs; }; static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) @@ -401,6 +454,248 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) +{ + const struct nl_info *nli = &fib6_cfg->fc_nlinfo; + + return nli->nl_net; +} + +static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, + u16 family, struct netlink_ext_ack *extack) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + int vrf_ifindex; + struct net *net; + + net = fib6_config_get_net(cfg); + + /* note that vrf_table was already set by parse_nla_vrftable() */ + vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, + info->vrf_table); + if (vrf_ifindex < 0) { + if (vrf_ifindex == -EPERM) { + NL_SET_ERR_MSG(extack, + "Strict mode for VRF is disabled"); + } else if (vrf_ifindex == -ENODEV) { + NL_SET_ERR_MSG(extack, + "Table has no associated VRF device"); + } else { + pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", + vrf_ifindex); + } + + return vrf_ifindex; + } + + info->net = net; + info->vrf_ifindex = vrf_ifindex; + + switch (family) { + case AF_INET: + info->proto = htons(ETH_P_IP); + info->hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + info->proto = htons(ETH_P_IPV6); + info->hdrlen = sizeof(struct ipv6hdr); + break; + default: + return -EINVAL; + } + + info->family = family; + info->mode = DT_VRF_MODE; + + return 0; +} + +/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and + * routes the IPv4/IPv6 packet by looking at the configured routing table. + * + * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment + * Routing Header packets) from several interfaces and the outer IPv6 + * destination address (DA) is used for retrieving the specific instance of the + * End.DT4/DT6 behavior that should process the packets. + * + * However, the inner IPv4/IPv6 packet is not really bound to any receiving + * interface and thus the End.DT4/DT6 sets the VRF (associated with the + * corresponding routing table) as the *receiving* interface. + * In other words, the End.DT4/DT6 processes a packet as if it has been received + * directly by the VRF (and not by one of its slave devices, if any). + * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in + * according to the routing table configured by the End.DT4/DT6 instance. + * + * This design allows you to get some interesting features like: + * 1) the statistics on rx packets; + * 2) the possibility to install a packet sniffer on the receiving interface + * (the VRF one) for looking at the incoming packets; + * 3) the possibility to leverage the netfilter prerouting hook for the inner + * IPv4 packet. + * + * This function returns: + * - the sk_buff* when the VRF rcv handler has processed the packet correctly; + * - NULL when the skb is consumed by the VRF rcv handler; + * - a pointer which encodes a negative error number in case of error. + * Note that in this case, the function takes care of freeing the skb. + */ +static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family, + struct net_device *dev) +{ + /* based on l3mdev_ip_rcv; we are only interested in the master */ + if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev))) + goto drop; + + if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv)) + goto drop; + + /* the decap packet IPv4/IPv6 does not come with any mac header info. + * We must unset the mac header to allow the VRF device to rebuild it, + * just in case there is a sniffer attached on the device. + */ + skb_unset_mac_header(skb); + + skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family); + if (!skb) + /* the skb buffer was consumed by the handler */ + return NULL; + + /* when a packet is received by a VRF or by one of its slaves, the + * master device reference is set into the skb. + */ + if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex)) + goto drop; + + return skb; + +drop: + kfree_skb(skb); + return ERR_PTR(-EINVAL); +} + +static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb, + struct seg6_end_dt_info *info) +{ + int vrf_ifindex = info->vrf_ifindex; + struct net *net = info->net; + + if (unlikely(vrf_ifindex < 0)) + goto error; + + if (unlikely(!net_eq(dev_net(skb->dev), net))) + goto error; + + return dev_get_by_index_rcu(net, vrf_ifindex); + +error: + return NULL; +} + +static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + struct net_device *vrf; + + vrf = end_dt_get_vrf_rcu(skb, info); + if (unlikely(!vrf)) + goto drop; + + skb->protocol = info->proto; + + skb_dst_drop(skb); + + skb_set_transport_header(skb, info->hdrlen); + + return end_dt_vrf_rcv(skb, info->family, vrf); + +drop: + kfree_skb(skb); + return ERR_PTR(-EINVAL); +} + +static int input_action_end_dt4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct iphdr *iph; + int err; + + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb = end_dt_vrf_core(skb, slwt); + if (!skb) + /* packet has been processed and consumed by the VRF */ + return 0; + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + iph = ip_hdr(skb); + + err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); + if (unlikely(err)) + goto drop; + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack); +} + +static enum +seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) +{ + unsigned long parsed_optattrs = slwt->parsed_optattrs; + bool legacy, vrfmode; + + legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE)); + vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE)); + + if (!(legacy ^ vrfmode)) + /* both are absent or present: invalid DT6 mode */ + return DT_INVALID_MODE; + + return legacy ? DT_LEGACY_MODE : DT_VRF_MODE; +} + +static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + + return info->mode; +} + +static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt); + struct seg6_end_dt_info *info = &slwt->dt_info; + + switch (mode) { + case DT_LEGACY_MODE: + info->mode = DT_LEGACY_MODE; + return 0; + case DT_VRF_MODE: + return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack); + default: + NL_SET_ERR_MSG(extack, "table or vrftable must be specified"); + return -EINVAL; + } +} +#endif + static int input_action_end_dt6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -410,6 +705,28 @@ static int input_action_end_dt6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; +#ifdef CONFIG_NET_L3_MASTER_DEV + if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE) + goto legacy_mode; + + /* DT6_VRF_MODE */ + skb = end_dt_vrf_core(skb, slwt); + if (!skb) + /* packet has been processed and consumed by the VRF */ + return 0; + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + /* note: this time we do not need to specify the table because the VRF + * takes care of selecting the correct table. + */ + seg6_lookup_any_nexthop(skb, NULL, 0, true); + + return dst_input(skb); + +legacy_mode: +#endif skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); @@ -590,8 +907,27 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dx4, }, { + .action = SEG6_LOCAL_ACTION_END_DT4, + .attrs = (1 << SEG6_LOCAL_VRFTABLE), +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt4, + .slwt_ops = { + .build_state = seg6_end_dt4_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_DT6, +#ifdef CONFIG_NET_L3_MASTER_DEV + .attrs = 0, + .optattrs = (1 << SEG6_LOCAL_TABLE) | + (1 << SEG6_LOCAL_VRFTABLE), + .slwt_ops = { + .build_state = seg6_end_dt6_build, + }, +#else .attrs = (1 << SEG6_LOCAL_TABLE), +#endif .input = input_action_end_dt6, }, { @@ -649,6 +985,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, + [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, @@ -710,6 +1047,11 @@ static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return memcmp(a->srh, b->srh, len); } +static void destroy_attr_srh(struct seg6_local_lwt *slwt) +{ + kfree(slwt->srh); +} + static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) { slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); @@ -733,6 +1075,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return 0; } +static struct +seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + return &slwt->dt_info; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +static int parse_nla_vrftable(struct nlattr **attrs, + struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); + + if (IS_ERR(info)) + return PTR_ERR(info); + + info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]); + + return 0; +} + +static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); + + if (IS_ERR(info)) + return PTR_ERR(info); + + if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a); + struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b); + + if (info_a->vrf_table != info_b->vrf_table) + return 1; + + return 0; +} + static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) { memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), @@ -901,16 +1290,30 @@ static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return strcmp(a->bpf.name, b->bpf.name); } +static void destroy_attr_bpf(struct seg6_local_lwt *slwt) +{ + kfree(slwt->bpf.name); + if (slwt->bpf.prog) + bpf_prog_put(slwt->bpf.prog); +} + struct seg6_action_param { int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); + + /* optional destroy() callback useful for releasing resources which + * have been previously acquired in the corresponding parse() + * function. + */ + void (*destroy)(struct seg6_local_lwt *slwt); }; static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, .put = put_nla_srh, - .cmp = cmp_nla_srh }, + .cmp = cmp_nla_srh, + .destroy = destroy_attr_srh }, [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, .put = put_nla_table, @@ -934,14 +1337,130 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, .put = put_nla_bpf, - .cmp = cmp_nla_bpf }, + .cmp = cmp_nla_bpf, + .destroy = destroy_attr_bpf }, + + [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable, + .put = put_nla_vrftable, + .cmp = cmp_nla_vrftable }, }; +/* call the destroy() callback (if available) for each set attribute in + * @parsed_attrs, starting from the first attribute up to the @max_parsed + * (excluded) attribute. + */ +static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, + struct seg6_local_lwt *slwt) +{ + struct seg6_action_param *param; + int i; + + /* Every required seg6local attribute is identified by an ID which is + * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask; + * + * We scan the 'parsed_attrs' bitmask, starting from the first attribute + * up to the @max_parsed (excluded) attribute. + * For each set attribute, we retrieve the corresponding destroy() + * callback. If the callback is not available, then we skip to the next + * attribute; otherwise, we call the destroy() callback. + */ + for (i = 0; i < max_parsed; ++i) { + if (!(parsed_attrs & (1 << i))) + continue; + + param = &seg6_action_params[i]; + + if (param->destroy) + param->destroy(slwt); + } +} + +/* release all the resources that may have been acquired during parsing + * operations. + */ +static void destroy_attrs(struct seg6_local_lwt *slwt) +{ + unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs; + + __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt); +} + +static int parse_nla_optional_attrs(struct nlattr **attrs, + struct seg6_local_lwt *slwt) +{ + struct seg6_action_desc *desc = slwt->desc; + unsigned long parsed_optattrs = 0; + struct seg6_action_param *param; + int err, i; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { + if (!(desc->optattrs & (1 << i)) || !attrs[i]) + continue; + + /* once here, the i-th attribute is provided by the + * userspace AND it is identified optional as well. + */ + param = &seg6_action_params[i]; + + err = param->parse(attrs, slwt); + if (err < 0) + goto parse_optattrs_err; + + /* current attribute has been correctly parsed */ + parsed_optattrs |= (1 << i); + } + + /* store in the tunnel state all the optional attributed successfully + * parsed. + */ + slwt->parsed_optattrs = parsed_optattrs; + + return 0; + +parse_optattrs_err: + __destroy_attrs(parsed_optattrs, i, slwt); + + return err; +} + +/* call the custom constructor of the behavior during its initialization phase + * and after that all its attributes have been parsed successfully. + */ +static int +seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + struct seg6_action_desc *desc = slwt->desc; + struct seg6_local_lwtunnel_ops *ops; + + ops = &desc->slwt_ops; + if (!ops->build_state) + return 0; + + return ops->build_state(slwt, cfg, extack); +} + +/* call the custom destructor of the behavior which is invoked before the + * tunnel is going to be destroyed. + */ +static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) +{ + struct seg6_action_desc *desc = slwt->desc; + struct seg6_local_lwtunnel_ops *ops; + + ops = &desc->slwt_ops; + if (!ops->destroy_state) + return; + + ops->destroy_state(slwt); +} + static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) { struct seg6_action_param *param; struct seg6_action_desc *desc; + unsigned long invalid_attrs; int i, err; desc = __get_action_desc(slwt->action); @@ -954,6 +1473,26 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) slwt->desc = desc; slwt->headroom += desc->static_headroom; + /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be + * disjoined, this allow us to release acquired resources by optional + * attributes and by required attributes independently from each other + * without any interfarence. + * In other terms, we are sure that we do not release some the acquired + * resources twice. + * + * Note that if an attribute is configured both as required and as + * optional, it means that the user has messed something up in the + * seg6_action_table. Therefore, this check is required for SRv6 + * behaviors to work properly. + */ + invalid_attrs = desc->attrs & desc->optattrs; + if (invalid_attrs) { + WARN_ONCE(1, + "An attribute cannot be both required AND optional"); + return -EINVAL; + } + + /* parse the required attributes */ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { if (desc->attrs & (1 << i)) { if (!attrs[i]) @@ -963,11 +1502,24 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) err = param->parse(attrs, slwt); if (err < 0) - return err; + goto parse_attrs_err; } } + /* parse the optional attributes, if any */ + err = parse_nla_optional_attrs(attrs, slwt); + if (err < 0) + goto parse_attrs_err; + return 0; + +parse_attrs_err: + /* release any resource that may have been acquired during the i-1 + * parse() operations. + */ + __destroy_attrs(desc->attrs, i, slwt); + + return err; } static int seg6_local_build_state(struct net *net, struct nlattr *nla, @@ -1003,6 +1555,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, if (err < 0) goto out_free; + err = seg6_local_lwtunnel_build_state(slwt, cfg, extack); + if (err < 0) + goto out_destroy_attrs; + newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; newts->headroom = slwt->headroom; @@ -1011,8 +1567,9 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, return 0; +out_destroy_attrs: + destroy_attrs(slwt); out_free: - kfree(slwt->srh); kfree(newts); return err; } @@ -1021,12 +1578,9 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt) { struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); - kfree(slwt->srh); + seg6_local_lwtunnel_destroy_state(slwt); - if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) { - kfree(slwt->bpf.name); - bpf_prog_put(slwt->bpf.prog); - } + destroy_attrs(slwt); return; } @@ -1036,13 +1590,16 @@ static int seg6_local_fill_encap(struct sk_buff *skb, { struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); struct seg6_action_param *param; + unsigned long attrs; int i, err; if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) return -EMSGSIZE; + attrs = slwt->desc->attrs | slwt->parsed_optattrs; + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (slwt->desc->attrs & (1 << i)) { + if (attrs & (1 << i)) { param = &seg6_action_params[i]; err = param->put(skb, slwt); if (err < 0) @@ -1061,7 +1618,7 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) nlsize = nla_total_size(4); /* action */ - attrs = slwt->desc->attrs; + attrs = slwt->desc->attrs | slwt->parsed_optattrs; if (attrs & (1 << SEG6_LOCAL_SRH)) nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); @@ -1086,6 +1643,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) nla_total_size(MAX_PROG_NAME) + nla_total_size(4); + if (attrs & (1 << SEG6_LOCAL_VRFTABLE)) + nlsize += nla_total_size(4); + return nlsize; } @@ -1094,6 +1654,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, { struct seg6_local_lwt *slwt_a, *slwt_b; struct seg6_action_param *param; + unsigned long attrs_a, attrs_b; int i; slwt_a = seg6_local_lwtunnel(a); @@ -1102,11 +1663,14 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, if (slwt_a->action != slwt_b->action) return 1; - if (slwt_a->desc->attrs != slwt_b->desc->attrs) + attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs; + attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs; + + if (attrs_a != attrs_b) return 1; for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (slwt_a->desc->attrs & (1 << i)) { + if (attrs_a & (1 << i)) { param = &seg6_action_params[i]; if (param->cmp(slwt_a, slwt_b)) return 1; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8db59f4e5f13..e254569a3005 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -527,15 +527,21 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; + + if (!INET_ECN_is_capable(tclass) && + tcp_bpf_ca_needs_ecn((struct sock *)req)) + tclass |= INET_ECN_ECT_0; + rcu_read_lock(); opt = ireq->ipv6_opt; - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos : np->tclass; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, - tclass & ~INET_ECN_MASK, - sk->sk_priority); + tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -823,9 +829,15 @@ static void tcp_v6_init_req(struct request_sock *req, } static struct dst_entry *tcp_v6_route_req(const struct sock *sk, + struct sk_buff *skb, struct flowi *fl, - const struct request_sock *req) + struct request_sock *req) { + tcp_v6_init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + return NULL; + return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } @@ -846,7 +858,6 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif - .init_req = tcp_v6_init_req, #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -1193,6 +1204,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; + bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1314,7 +1326,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (np->repflow) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; @@ -1368,7 +1382,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); @@ -1383,6 +1398,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * skb_set_owner_r(newnp->pktoptions, newsk); } } + } else { + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; |