diff options
author | David S. Miller <davem@davemloft.net> | 2021-07-21 18:21:39 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-07-21 18:21:39 +0300 |
commit | 7c804e91df523a37c29e183ea2b10ac73c3a4f3d (patch) | |
tree | 9c6f9f15af0b3bb0536720be5435ac711df55599 /net/ipv6 | |
parent | 71f4f89a0324459f81656f3f9b20c1c0becaf647 (diff) | |
parent | 968691c777af78d2daa2ee87cfaeeae825255a58 (diff) | |
download | linux-7c804e91df523a37c29e183ea2b10ac73c3a4f3d.tar.xz |
Merge branch 'ipv6-ioam'
Justin Iurman says:
====================
Support for the IOAM Pre-allocated Trace with IPv6
v5:
- Refine types, min/max and default values for new sysctls
- Introduce a "_wide" sysctl for each "ioam6_id" sysctl
- Add more validation on headers before processing data
- RCU for sc <> ns pointers + appropriate accessors
- Generic Netlink policies are now per op, not per family anymore
- Address other comments/remarks from Jakub (thanks again)
- Revert "__packed" to "__attribute__((packed))" for uapi headers
- Add tests to cover the functionality added, as requested by David Ahern
v4:
- Address warnings from checkpatch (ignore errors related to unnamed bitfields
in the first patch)
- Use of hweight32 (thanks Jakub)
- Remove inline keyword from static functions in C files and let the compiler
decide what to do (thanks Jakub)
v3:
- Fix warning "unused label 'out_unregister_genl'" by adding conditional macro
- Fix lwtunnel output redirect bug: dst cache useless in this case, use
orig_output instead
v2:
- Fix warning with static for __ioam6_fill_trace_data
- Fix sparse warning with __force when casting __be64 to __be32
- Fix unchecked dereference when removing IOAM namespaces or schemas
- exthdrs.c: Don't drop by default (now: ignore) to match the act bits "00"
- Add control plane support for the inline insertion (lwtunnel)
- Provide uapi structures
- Use __net_timestamp if skb->tstamp is empty
- Add note about the temporary IANA allocation
- Remove support for "removable" TLVs
- Remove support for virtual/anonymous tunnel decapsulation
In-situ Operations, Administration, and Maintenance (IOAM) records
operational and telemetry information in a packet while it traverses
a path between two points in an IOAM domain. It is defined in
draft-ietf-ippm-ioam-data [1]. IOAM data fields can be encapsulated
into a variety of protocols. The IPv6 encapsulation is defined in
draft-ietf-ippm-ioam-ipv6-options [2], via extension headers. IOAM
can be used to complement OAM mechanisms based on e.g. ICMP or other
types of probe packets.
This patchset implements support for the Pre-allocated Trace, carried
by a Hop-by-Hop. Therefore, a new IPv6 Hop-by-Hop TLV option is
introduced, see IANA [3]. The three other IOAM options are not included
in this patchset (Incremental Trace, Proof-of-Transit and Edge-to-Edge).
The main idea behind the IOAM Pre-allocated Trace is that a node
pre-allocates some room in packets for IOAM data. Then, each IOAM node
on the path will insert its data. There exist several interesting use-
cases, e.g. Fast failure detection/isolation or Smart service selection.
Another killer use-case is what we have called Cross-Layer Telemetry,
see the demo video on its repository [4], that aims to make the entire
stack (L2/L3 -> L7) visible for distributed tracing tools (e.g. Jaeger),
instead of the current L5 -> L7 limited view. So, basically, this is a
nice feature for the Linux Kernel.
This patchset also provides support for the control plane part, but only for the
inline insertion (host-to-host use case), through lightweight tunnels. Indeed,
for in-transit traffic, the solution is to have an IPv6-in-IPv6 encapsulation,
which brings some difficulties and still requires a little bit of work and
discussion (ie anonymous tunnel decapsulation and multi egress resolution).
- Patch 1: IPv6 IOAM headers definition
- Patch 2: Data plane support for Pre-allocated Trace
- Patch 3: IOAM Generic Netlink API
- Patch 4: Support for IOAM injection with lwtunnels
- Patch 5: Documentation for new IOAM sysctls
- Patch 6: Test for the IOAM insertion with IPv6
[1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data
[2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options
[3] https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2
[4] https://github.com/iurmanj/cross-layer-telemetry
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Kconfig | 11 | ||||
-rw-r--r-- | net/ipv6/Makefile | 3 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 37 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 10 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 61 | ||||
-rw-r--r-- | net/ipv6/ioam6.c | 910 | ||||
-rw-r--r-- | net/ipv6/ioam6_iptunnel.c | 274 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 19 |
8 files changed, 1324 insertions, 1 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 747f56e0c636..e504204bca92 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL If unsure, say N. +config IPV6_IOAM6_LWTUNNEL + bool "IPv6: IOAM Pre-allocated Trace insertion support" + depends on IPV6 + select LWTUNNEL + help + Support for the inline insertion of IOAM Pre-allocated + Trace Header (only on locally generated packets), using + the lightweight tunnels mechanism. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index cf7b47bdb9b3..1bc7e143217b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o fib6_notifier.o rpl.o + udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o +ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bc330fffb4a8..1802287977f1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -89,12 +89,15 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> +#include <linux/ioam6.h> #define INFINITY_LIFE_TIME 0xFFFFFFFF #define IPV6_MAX_STRLEN \ sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") +static u32 ioam6_if_id_max = U16_MAX; + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -237,6 +240,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -293,6 +299,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -5524,6 +5533,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; + array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; + array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; + array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; } static inline size_t inet6_ifla6_size(void) @@ -6931,6 +6943,31 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { + .procname = "ioam6_enabled", + .data = &ipv6_devconf.ioam6_enabled, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { + .procname = "ioam6_id", + .data = &ipv6_devconf.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)&ioam6_if_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &ipv6_devconf.ioam6_id_wide, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2389ff702f51..d92c90d97763 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,7 @@ #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> +#include <net/ioam6.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); + net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; + net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; + err = ipv6_init_mibs(net); if (err) return err; @@ -1191,6 +1195,10 @@ static int __init inet6_init(void) if (err) goto rpl_fail; + err = ioam6_init(); + if (err) + goto ioam6_fail; + err = igmp6_late_init(); if (err) goto igmp6_late_err; @@ -1213,6 +1221,8 @@ sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: + ioam6_exit(); +ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26882e165c9e..d897faa4e9e6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,6 +49,9 @@ #include <net/seg6_hmac.h> #endif #include <net/rpl.h> +#include <linux/ioam6.h> +#include <net/ioam6.h> +#include <net/dst_metadata.h> #include <linux/uaccess.h> @@ -928,6 +931,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) return false; } +/* IOAM */ + +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) +{ + struct ioam6_trace_hdr *trace; + struct ioam6_namespace *ns; + struct ioam6_hdr *hdr; + + /* Bad alignment (must be 4n-aligned) */ + if (optoff & 3) + goto drop; + + /* Ignore if IOAM is not enabled on ingress */ + if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + goto ignore; + + /* Truncated Option header */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + if (hdr->opt_len < 2) + goto drop; + + switch (hdr->type) { + case IOAM6_TYPE_PREALLOC: + /* Truncated Pre-allocated Trace header */ + if (hdr->opt_len < 2 + sizeof(*trace)) + goto drop; + + /* Malformed Pre-allocated Trace header */ + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); + if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) + goto drop; + + /* Ignore if the IOAM namespace is unknown */ + ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id); + if (!ns) + goto ignore; + + if (!skb_valid_dst(skb)) + ip6_route_input(skb); + + ioam6_fill_trace_data(skb, ns, trace); + break; + default: + break; + } + +ignore: + return true; + +drop: + kfree_skb(skb); + return false; +} + /* Jumbo payload */ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) @@ -1000,6 +1057,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = { .func = ipv6_hop_ra, }, { + .type = IPV6_TLV_IOAM, + .func = ipv6_hop_ioam, + }, + { .type = IPV6_TLV_JUMBO, .func = ipv6_hop_jumbo, }, diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c new file mode 100644 index 000000000000..5e8961004832 --- /dev/null +++ b/net/ipv6/ioam6.c @@ -0,0 +1,910 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/ioam6.h> +#include <linux/ioam6_genl.h> +#include <linux/rhashtable.h> + +#include <net/addrconf.h> +#include <net/genetlink.h> +#include <net/ioam6.h> + +static void ioam6_ns_release(struct ioam6_namespace *ns) +{ + kfree_rcu(ns, rcu); +} + +static void ioam6_sc_release(struct ioam6_schema *sc) +{ + kfree_rcu(sc, rcu); +} + +static void ioam6_free_ns(void *ptr, void *arg) +{ + struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; + + if (ns) + ioam6_ns_release(ns); +} + +static void ioam6_free_sc(void *ptr, void *arg) +{ + struct ioam6_schema *sc = (struct ioam6_schema *)ptr; + + if (sc) + ioam6_sc_release(sc); +} + +static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_namespace *ns = obj; + + return (ns->id != *(__be16 *)arg->key); +} + +static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_schema *sc = obj; + + return (sc->id != *(u32 *)arg->key); +} + +static const struct rhashtable_params rht_ns_params = { + .key_len = sizeof(__be16), + .key_offset = offsetof(struct ioam6_namespace, id), + .head_offset = offsetof(struct ioam6_namespace, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_ns_cmpfn, +}; + +static const struct rhashtable_params rht_sc_params = { + .key_len = sizeof(u32), + .key_offset = offsetof(struct ioam6_schema, id), + .head_offset = offsetof(struct ioam6_schema, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_sc_cmpfn, +}; + +static struct genl_family ioam6_genl_family; + +static const struct nla_policy ioam6_genl_policy_addns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 }, + [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 }, +}; + +static const struct nla_policy ioam6_genl_policy_delns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, +}; + +static const struct nla_policy ioam6_genl_policy_addsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY, + .len = IOAM6_MAX_SCHEMA_DATA_LEN }, +}; + +static const struct nla_policy ioam6_genl_policy_delsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, +}; + +static const struct nla_policy ioam6_genl_policy_ns_sc[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG }, +}; + +static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + u64 data64; + u32 data32; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (ns) { + err = -EEXIST; + goto out_unlock; + } + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) { + err = -ENOMEM; + goto out_unlock; + } + + ns->id = id; + + if (!info->attrs[IOAM6_ATTR_NS_DATA]) + data32 = IOAM6_U32_UNAVAILABLE; + else + data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]); + + if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE]) + data64 = IOAM6_U64_UNAVAILABLE; + else + data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]); + + ns->data = cpu_to_be32(data32); + ns->data_wide = cpu_to_be64(data64); + + err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + kfree(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + sc = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + goto out_unlock; + + if (sc) + rcu_assign_pointer(sc->ns, NULL); + + ioam6_ns_release(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns, + u32 portid, + u32 seq, + u32 flags, + struct sk_buff *skb, + u8 cmd) +{ + struct ioam6_schema *sc; + u64 data64; + u32 data32; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + data32 = be32_to_cpu(ns->data); + data64 = be64_to_cpu(ns->data_wide); + + if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) || + (data32 != IOAM6_U32_UNAVAILABLE && + nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) || + (data64 != IOAM6_U64_UNAVAILABLE && + nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE, + data64, IOAM6_ATTR_PAD))) + goto nla_put_failure; + + rcu_read_lock(); + + sc = rcu_dereference(ns->schema); + if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpns_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->namespaces, iter); + + return 0; +} + +static int ioam6_genl_dumpns_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_namespace *ns; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + ns = rhashtable_walk_next(iter); + + if (IS_ERR(ns)) { + if (PTR_ERR(ns) == -EAGAIN) + continue; + err = PTR_ERR(ns); + goto done; + } else if (!ns) { + break; + } + + err = __ioam6_genl_dumpns_element(ns, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_NAMESPACES); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + int len, len_aligned, err; + struct ioam6_schema *sc; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (sc) { + err = -EEXIST; + goto out_unlock; + } + + len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]); + len_aligned = ALIGN(len, 4); + + sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL); + if (!sc) { + err = -ENOMEM; + goto out_unlock; + } + + sc->id = id; + sc->len = len_aligned; + sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24)); + nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len); + + err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto free_sc; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +free_sc: + kfree(sc); + goto out_unlock; +} + +static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + int err; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + + ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto out_unlock; + + if (ns) + rcu_assign_pointer(ns->schema, NULL); + + ioam6_sc_release(sc); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + struct ioam6_namespace *ns; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) || + nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data)) + goto nla_put_failure; + + rcu_read_lock(); + + ns = rcu_dereference(sc->ns); + if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpsc_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->schemas, iter); + + return 0; +} + +static int ioam6_genl_dumpsc_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_schema *sc; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + sc = rhashtable_walk_next(iter); + + if (IS_ERR(sc)) { + if (PTR_ERR(sc) == -EAGAIN) + continue; + err = PTR_ERR(sc); + goto done; + } else if (!sc) { + break; + } + + err = __ioam6_genl_dumpsc_element(sc, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_SCHEMAS); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_namespace *ns, *ns_ref; + struct ioam6_schema *sc, *sc_ref; + struct ioam6_pernet_data *nsdata; + __be16 ns_id; + u32 sc_id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID] || + (!info->attrs[IOAM6_ATTR_SC_ID] && + !info->attrs[IOAM6_ATTR_SC_NONE])) + return -EINVAL; + + ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + if (info->attrs[IOAM6_ATTR_SC_NONE]) { + sc = NULL; + } else { + sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id, + rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + } + + sc_ref = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + if (sc_ref) + rcu_assign_pointer(sc_ref->ns, NULL); + rcu_assign_pointer(ns->schema, sc); + + if (sc) { + ns_ref = rcu_dereference_protected(sc->ns, + lockdep_is_held(&nsdata->lock)); + if (ns_ref) + rcu_assign_pointer(ns_ref->schema, NULL); + rcu_assign_pointer(sc->ns, ns); + } + + err = 0; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static const struct genl_ops ioam6_genl_ops[] = { + { + .cmd = IOAM6_CMD_ADD_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_NAMESPACES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpns_start, + .dumpit = ioam6_genl_dumpns, + .done = ioam6_genl_dumpns_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_ADD_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_SCHEMAS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpsc_start, + .dumpit = ioam6_genl_dumpsc, + .done = ioam6_genl_dumpsc_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_NS_SET_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_ns_set_schema, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_ns_sc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1, + }, +}; + +static struct genl_family ioam6_genl_family __ro_after_init = { + .name = IOAM6_GENL_NAME, + .version = IOAM6_GENL_VERSION, + .netnsok = true, + .parallel_ops = true, + .ops = ioam6_genl_ops, + .n_ops = ARRAY_SIZE(ioam6_genl_ops), + .module = THIS_MODULE, +}; + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); +} + +static void __ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace, + struct ioam6_schema *sc, + u8 sclen) +{ + struct __kernel_sock_timeval ts; + u64 raw64; + u32 raw32; + u16 raw16; + u8 *data; + u8 byte; + + data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; + + /* hop_lim and node_id */ + if (trace->type.bit0) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; + + *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); + data += sizeof(__be32); + } + + /* ingress_if_id and egress_if_id */ + if (trace->type.bit1) { + if (!skb->dev) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + } + + /* timestamp seconds */ + if (trace->type.bit2) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + skb_get_new_timestamp(skb, &ts); + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + } + data += sizeof(__be32); + } + + /* timestamp subseconds */ + if (trace->type.bit3) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + if (!trace->type.bit2) + skb_get_new_timestamp(skb, &ts); + + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + } + data += sizeof(__be32); + } + + /* transit delay */ + if (trace->type.bit4) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* namespace data */ + if (trace->type.bit5) { + *(__be32 *)data = ns->data; + data += sizeof(__be32); + } + + /* queue depth */ + if (trace->type.bit6) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* checksum complement */ + if (trace->type.bit7) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* hop_lim and node_id (wide) */ + if (trace->type.bit8) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; + + *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); + data += sizeof(__be64); + } + + /* ingress_if_id and egress_if_id (wide) */ + if (trace->type.bit9) { + if (!skb->dev) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + } + + /* namespace data (wide) */ + if (trace->type.bit10) { + *(__be64 *)data = ns->data_wide; + data += sizeof(__be64); + } + + /* buffer occupancy */ + if (trace->type.bit11) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* opaque state snapshot */ + if (trace->type.bit22) { + if (!sc) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); + } else { + *(__be32 *)data = sc->hdr; + data += sizeof(__be32); + + memcpy(data, sc->data, sc->len); + } + } +} + +/* called with rcu_read_lock() */ +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_schema *sc; + u8 sclen = 0; + + /* Skip if Overflow flag is set OR + * if an unknown type (bit 12-21) is set + */ + if (trace->overflow || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) { + return; + } + + /* NodeLen does not include Opaque State Snapshot length. We need to + * take it into account if the corresponding bit is set (bit 22) and + * if the current IOAM namespace has an active schema attached to it + */ + sc = rcu_dereference(ns->schema); + if (trace->type.bit22) { + sclen = sizeof_field(struct ioam6_schema, hdr) / 4; + + if (sc) + sclen += sc->len / 4; + } + + /* If there is no space remaining, we set the Overflow flag and we + * skip without filling the trace + */ + if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { + trace->overflow = 1; + return; + } + + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + trace->remlen -= trace->nodelen + sclen; +} + +static int __net_init ioam6_net_init(struct net *net) +{ + struct ioam6_pernet_data *nsdata; + int err = -ENOMEM; + + nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); + if (!nsdata) + goto out; + + mutex_init(&nsdata->lock); + net->ipv6.ioam6_data = nsdata; + + err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); + if (err) + goto free_nsdata; + + err = rhashtable_init(&nsdata->schemas, &rht_sc_params); + if (err) + goto free_rht_ns; + +out: + return err; +free_rht_ns: + rhashtable_destroy(&nsdata->namespaces); +free_nsdata: + kfree(nsdata); + net->ipv6.ioam6_data = NULL; + goto out; +} + +static void __net_exit ioam6_net_exit(struct net *net) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); + rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); + + kfree(nsdata); +} + +static struct pernet_operations ioam6_net_ops = { + .init = ioam6_net_init, + .exit = ioam6_net_exit, +}; + +int __init ioam6_init(void) +{ + int err = register_pernet_subsys(&ioam6_net_ops); + if (err) + goto out; + + err = genl_register_family(&ioam6_genl_family); + if (err) + goto out_unregister_pernet_subsys; + +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + err = ioam6_iptunnel_init(); + if (err) + goto out_unregister_genl; +#endif + + pr_info("In-situ OAM (IOAM) with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL +out_unregister_genl: + genl_unregister_family(&ioam6_genl_family); +#endif +out_unregister_pernet_subsys: + unregister_pernet_subsys(&ioam6_net_ops); + goto out; +} + +void ioam6_exit(void) +{ +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + ioam6_iptunnel_exit(); +#endif + genl_unregister_family(&ioam6_genl_family); + unregister_pernet_subsys(&ioam6_net_ops); +} diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c new file mode 100644 index 000000000000..f9ee04541c17 --- /dev/null +++ b/net/ipv6/ioam6_iptunnel.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM Lightweight Tunnel implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/netlink.h> +#include <linux/in6.h> +#include <linux/ioam6.h> +#include <linux/ioam6_iptunnel.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/lwtunnel.h> +#include <net/ioam6.h> + +#define IOAM6_MASK_SHORT_FIELDS 0xff100000 +#define IOAM6_MASK_WIDE_FIELDS 0xe00000 + +struct ioam6_lwt_encap { + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; +} __packed; + +struct ioam6_lwt { + struct ioam6_lwt_encap tuninfo; +}; + +static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) +{ + return (struct ioam6_lwt *)lwt->data; +} + +static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) +{ + return &ioam6_lwt_state(lwt)->tuninfo; +} + +static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +{ + return &(ioam6_lwt_state(lwt)->tuninfo.traceh); +} + +static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), +}; + +static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_trace_hdr *data; + struct nlattr *nla; + int len; + + len = sizeof(*trace); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, trace, len); + + return 0; +} + +static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) +{ + u32 fields; + + if (!trace->type_be32 || !trace->remlen || + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + return false; + + trace->nodelen = 0; + fields = be32_to_cpu(trace->type_be32); + + trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) + * (sizeof(__be32) / 4); + trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) + * (sizeof(__be64) / 4); + + return true; +} + +static int ioam6_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; + struct ioam6_lwt_encap *tuninfo; + struct ioam6_trace_hdr *trace; + struct lwtunnel_state *s; + int len_aligned; + int len, err; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, + ioam6_iptunnel_policy, extack); + if (err < 0) + return err; + + if (!tb[IOAM6_IPTUNNEL_TRACE]) { + NL_SET_ERR_MSG(extack, "missing trace"); + return -EINVAL; + } + + trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); + if (!ioam6_validate_trace_hdr(trace)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], + "invalid trace validation"); + return -EINVAL; + } + + len = sizeof(*tuninfo) + trace->remlen * 4; + len_aligned = ALIGN(len, 8); + + s = lwtunnel_state_alloc(len_aligned); + if (!s) + return -ENOMEM; + + tuninfo = ioam6_lwt_info(s); + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + tuninfo->pad[0] = IPV6_TLV_PADN; + tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; + tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; + tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) + + trace->remlen * 4; + + memcpy(&tuninfo->traceh, trace, sizeof(*trace)); + + len = len_aligned - len; + if (len == 1) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; + } else if (len > 0) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + } + + s->type = LWTUNNEL_ENCAP_IOAM6; + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = s; + + return 0; +} + +static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +{ + struct ioam6_trace_hdr *trace; + struct ipv6hdr *oldhdr, *hdr; + struct ioam6_namespace *ns; + int hdrlen, err; + + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + + err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + skb_pull(skb, sizeof(*oldhdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); + + skb_push(skb, sizeof(*oldhdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + memmove(hdr, oldhdr, sizeof(*oldhdr)); + tuninfo->eh.nexthdr = hdr->nexthdr; + + skb_set_transport_header(skb, sizeof(*hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); + + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace); + + return 0; +} + +static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + int err = -EINVAL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + /* Only for packets we send and + * that do not contain a Hop-by-Hop yet + */ + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); + if (unlikely(err)) + goto drop; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + goto drop; + +out: + return lwt->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return err; +} + +static int ioam6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) + return -EMSGSIZE; + + return 0; +} + +static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + return nla_total_size(sizeof(*trace)); +} + +static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); + + return (a_hdr->namespace_id != b_hdr->namespace_id); +} + +static const struct lwtunnel_encap_ops ioam6_iptun_ops = { + .build_state = ioam6_build_state, + .output = ioam6_output, + .fill_encap = ioam6_fill_encap_info, + .get_encap_size = ioam6_encap_nlsize, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init ioam6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} + +void ioam6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d7cf26f730d7..d53dd142bf87 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,6 +21,7 @@ #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif +#include <linux/ioam6.h> static int two = 2; static int three = 3; @@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static u32 ioam6_id_max = IOAM6_DEFAULT_ID; +static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "ioam6_id", + .data = &init_net.ipv6.sysctl.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra2 = &ioam6_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &init_net.ipv6.sysctl.ioam6_id_wide, + .maxlen = sizeof(u64), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra2 = &ioam6_id_wide_max, + }, { } }; |