summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-07-21 18:21:39 +0300
committerDavid S. Miller <davem@davemloft.net>2021-07-21 18:21:39 +0300
commit7c804e91df523a37c29e183ea2b10ac73c3a4f3d (patch)
tree9c6f9f15af0b3bb0536720be5435ac711df55599 /net/ipv6
parent71f4f89a0324459f81656f3f9b20c1c0becaf647 (diff)
parent968691c777af78d2daa2ee87cfaeeae825255a58 (diff)
downloadlinux-7c804e91df523a37c29e183ea2b10ac73c3a4f3d.tar.xz
Merge branch 'ipv6-ioam'
Justin Iurman says: ==================== Support for the IOAM Pre-allocated Trace with IPv6 v5: - Refine types, min/max and default values for new sysctls - Introduce a "_wide" sysctl for each "ioam6_id" sysctl - Add more validation on headers before processing data - RCU for sc <> ns pointers + appropriate accessors - Generic Netlink policies are now per op, not per family anymore - Address other comments/remarks from Jakub (thanks again) - Revert "__packed" to "__attribute__((packed))" for uapi headers - Add tests to cover the functionality added, as requested by David Ahern v4: - Address warnings from checkpatch (ignore errors related to unnamed bitfields in the first patch) - Use of hweight32 (thanks Jakub) - Remove inline keyword from static functions in C files and let the compiler decide what to do (thanks Jakub) v3: - Fix warning "unused label 'out_unregister_genl'" by adding conditional macro - Fix lwtunnel output redirect bug: dst cache useless in this case, use orig_output instead v2: - Fix warning with static for __ioam6_fill_trace_data - Fix sparse warning with __force when casting __be64 to __be32 - Fix unchecked dereference when removing IOAM namespaces or schemas - exthdrs.c: Don't drop by default (now: ignore) to match the act bits "00" - Add control plane support for the inline insertion (lwtunnel) - Provide uapi structures - Use __net_timestamp if skb->tstamp is empty - Add note about the temporary IANA allocation - Remove support for "removable" TLVs - Remove support for virtual/anonymous tunnel decapsulation In-situ Operations, Administration, and Maintenance (IOAM) records operational and telemetry information in a packet while it traverses a path between two points in an IOAM domain. It is defined in draft-ietf-ippm-ioam-data [1]. IOAM data fields can be encapsulated into a variety of protocols. The IPv6 encapsulation is defined in draft-ietf-ippm-ioam-ipv6-options [2], via extension headers. IOAM can be used to complement OAM mechanisms based on e.g. ICMP or other types of probe packets. This patchset implements support for the Pre-allocated Trace, carried by a Hop-by-Hop. Therefore, a new IPv6 Hop-by-Hop TLV option is introduced, see IANA [3]. The three other IOAM options are not included in this patchset (Incremental Trace, Proof-of-Transit and Edge-to-Edge). The main idea behind the IOAM Pre-allocated Trace is that a node pre-allocates some room in packets for IOAM data. Then, each IOAM node on the path will insert its data. There exist several interesting use- cases, e.g. Fast failure detection/isolation or Smart service selection. Another killer use-case is what we have called Cross-Layer Telemetry, see the demo video on its repository [4], that aims to make the entire stack (L2/L3 -> L7) visible for distributed tracing tools (e.g. Jaeger), instead of the current L5 -> L7 limited view. So, basically, this is a nice feature for the Linux Kernel. This patchset also provides support for the control plane part, but only for the inline insertion (host-to-host use case), through lightweight tunnels. Indeed, for in-transit traffic, the solution is to have an IPv6-in-IPv6 encapsulation, which brings some difficulties and still requires a little bit of work and discussion (ie anonymous tunnel decapsulation and multi egress resolution). - Patch 1: IPv6 IOAM headers definition - Patch 2: Data plane support for Pre-allocated Trace - Patch 3: IOAM Generic Netlink API - Patch 4: Support for IOAM injection with lwtunnels - Patch 5: Documentation for new IOAM sysctls - Patch 6: Test for the IOAM insertion with IPv6 [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [3] https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2 [4] https://github.com/iurmanj/cross-layer-telemetry ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig11
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c37
-rw-r--r--net/ipv6/af_inet6.c10
-rw-r--r--net/ipv6/exthdrs.c61
-rw-r--r--net/ipv6/ioam6.c910
-rw-r--r--net/ipv6/ioam6_iptunnel.c274
-rw-r--r--net/ipv6/sysctl_net_ipv6.c19
8 files changed, 1324 insertions, 1 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 747f56e0c636..e504204bca92 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL
If unsure, say N.
+config IPV6_IOAM6_LWTUNNEL
+ bool "IPv6: IOAM Pre-allocated Trace insertion support"
+ depends on IPV6
+ select LWTUNNEL
+ help
+ Support for the inline insertion of IOAM Pre-allocated
+ Trace Header (only on locally generated packets), using
+ the lightweight tunnels mechanism.
+
+ If unsure, say N.
+
endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index cf7b47bdb9b3..1bc7e143217b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o fib6_notifier.o rpl.o
+ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o
ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
+ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bc330fffb4a8..1802287977f1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -89,12 +89,15 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
+#include <linux/ioam6.h>
#define INFINITY_LIFE_TIME 0xFFFFFFFF
#define IPV6_MAX_STRLEN \
sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
+static u32 ioam6_if_id_max = U16_MAX;
+
static inline u32 cstamp_delta(unsigned long cstamp)
{
return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
@@ -237,6 +240,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -293,6 +299,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -5524,6 +5533,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+ array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
+ array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
+ array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
}
static inline size_t inet6_ifla6_size(void)
@@ -6931,6 +6943,31 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ioam6_enabled",
+ .data = &ipv6_devconf.ioam6_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
+ .procname = "ioam6_id",
+ .data = &ipv6_devconf.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)&ioam6_if_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &ipv6_devconf.ioam6_id_wide,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2389ff702f51..d92c90d97763 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -62,6 +62,7 @@
#include <net/rpl.h>
#include <net/compat.h>
#include <net/xfrm.h>
+#include <net/ioam6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
+ net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
+ net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
+
err = ipv6_init_mibs(net);
if (err)
return err;
@@ -1191,6 +1195,10 @@ static int __init inet6_init(void)
if (err)
goto rpl_fail;
+ err = ioam6_init();
+ if (err)
+ goto ioam6_fail;
+
err = igmp6_late_init();
if (err)
goto igmp6_late_err;
@@ -1213,6 +1221,8 @@ sysctl_fail:
igmp6_late_cleanup();
#endif
igmp6_late_err:
+ ioam6_exit();
+ioam6_fail:
rpl_exit();
rpl_fail:
seg6_exit();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 26882e165c9e..d897faa4e9e6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -49,6 +49,9 @@
#include <net/seg6_hmac.h>
#endif
#include <net/rpl.h>
+#include <linux/ioam6.h>
+#include <net/ioam6.h>
+#include <net/dst_metadata.h>
#include <linux/uaccess.h>
@@ -928,6 +931,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
return false;
}
+/* IOAM */
+
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ioam6_namespace *ns;
+ struct ioam6_hdr *hdr;
+
+ /* Bad alignment (must be 4n-aligned) */
+ if (optoff & 3)
+ goto drop;
+
+ /* Ignore if IOAM is not enabled on ingress */
+ if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ goto ignore;
+
+ /* Truncated Option header */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ if (hdr->opt_len < 2)
+ goto drop;
+
+ switch (hdr->type) {
+ case IOAM6_TYPE_PREALLOC:
+ /* Truncated Pre-allocated Trace header */
+ if (hdr->opt_len < 2 + sizeof(*trace))
+ goto drop;
+
+ /* Malformed Pre-allocated Trace header */
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
+ if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4)
+ goto drop;
+
+ /* Ignore if the IOAM namespace is unknown */
+ ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ if (!ns)
+ goto ignore;
+
+ if (!skb_valid_dst(skb))
+ ip6_route_input(skb);
+
+ ioam6_fill_trace_data(skb, ns, trace);
+ break;
+ default:
+ break;
+ }
+
+ignore:
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
/* Jumbo payload */
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
@@ -1000,6 +1057,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
.func = ipv6_hop_ra,
},
{
+ .type = IPV6_TLV_IOAM,
+ .func = ipv6_hop_ioam,
+ },
+ {
.type = IPV6_TLV_JUMBO,
.func = ipv6_hop_jumbo,
},
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
new file mode 100644
index 000000000000..5e8961004832
--- /dev/null
+++ b/net/ipv6/ioam6.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
+#include <linux/rhashtable.h>
+
+#include <net/addrconf.h>
+#include <net/genetlink.h>
+#include <net/ioam6.h>
+
+static void ioam6_ns_release(struct ioam6_namespace *ns)
+{
+ kfree_rcu(ns, rcu);
+}
+
+static void ioam6_sc_release(struct ioam6_schema *sc)
+{
+ kfree_rcu(sc, rcu);
+}
+
+static void ioam6_free_ns(void *ptr, void *arg)
+{
+ struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr;
+
+ if (ns)
+ ioam6_ns_release(ns);
+}
+
+static void ioam6_free_sc(void *ptr, void *arg)
+{
+ struct ioam6_schema *sc = (struct ioam6_schema *)ptr;
+
+ if (sc)
+ ioam6_sc_release(sc);
+}
+
+static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_namespace *ns = obj;
+
+ return (ns->id != *(__be16 *)arg->key);
+}
+
+static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_schema *sc = obj;
+
+ return (sc->id != *(u32 *)arg->key);
+}
+
+static const struct rhashtable_params rht_ns_params = {
+ .key_len = sizeof(__be16),
+ .key_offset = offsetof(struct ioam6_namespace, id),
+ .head_offset = offsetof(struct ioam6_namespace, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_ns_cmpfn,
+};
+
+static const struct rhashtable_params rht_sc_params = {
+ .key_len = sizeof(u32),
+ .key_offset = offsetof(struct ioam6_schema, id),
+ .head_offset = offsetof(struct ioam6_schema, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_sc_cmpfn,
+};
+
+static struct genl_family ioam6_genl_family;
+
+static const struct nla_policy ioam6_genl_policy_addns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 },
+ [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy ioam6_genl_policy_delns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+};
+
+static const struct nla_policy ioam6_genl_policy_addsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY,
+ .len = IOAM6_MAX_SCHEMA_DATA_LEN },
+};
+
+static const struct nla_policy ioam6_genl_policy_delsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ioam6_genl_policy_ns_sc[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG },
+};
+
+static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ u64 data64;
+ u32 data32;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (ns) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ ns->id = id;
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA])
+ data32 = IOAM6_U32_UNAVAILABLE;
+ else
+ data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
+ data64 = IOAM6_U64_UNAVAILABLE;
+ else
+ data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+
+ ns->data = cpu_to_be32(data32);
+ ns->data_wide = cpu_to_be64(data64);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ kfree(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ sc = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ goto out_unlock;
+
+ if (sc)
+ rcu_assign_pointer(sc->ns, NULL);
+
+ ioam6_ns_release(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns,
+ u32 portid,
+ u32 seq,
+ u32 flags,
+ struct sk_buff *skb,
+ u8 cmd)
+{
+ struct ioam6_schema *sc;
+ u64 data64;
+ u32 data32;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ data32 = be32_to_cpu(ns->data);
+ data64 = be64_to_cpu(ns->data_wide);
+
+ if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) ||
+ (data32 != IOAM6_U32_UNAVAILABLE &&
+ nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) ||
+ (data64 != IOAM6_U64_UNAVAILABLE &&
+ nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE,
+ data64, IOAM6_ATTR_PAD)))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ sc = rcu_dereference(ns->schema);
+ if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpns_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->namespaces, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_namespace *ns;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ ns = rhashtable_walk_next(iter);
+
+ if (IS_ERR(ns)) {
+ if (PTR_ERR(ns) == -EAGAIN)
+ continue;
+ err = PTR_ERR(ns);
+ goto done;
+ } else if (!ns) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpns_element(ns,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_NAMESPACES);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ int len, len_aligned, err;
+ struct ioam6_schema *sc;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (sc) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]);
+ len_aligned = ALIGN(len, 4);
+
+ sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL);
+ if (!sc) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sc->id = id;
+ sc->len = len_aligned;
+ sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24));
+ nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto free_sc;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+free_sc:
+ kfree(sc);
+ goto out_unlock;
+}
+
+static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ int err;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto out_unlock;
+
+ if (ns)
+ rcu_assign_pointer(ns->schema, NULL);
+
+ ioam6_sc_release(sc);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ struct ioam6_namespace *ns;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) ||
+ nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ ns = rcu_dereference(sc->ns);
+ if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpsc_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->schemas, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_schema *sc;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ sc = rhashtable_walk_next(iter);
+
+ if (IS_ERR(sc)) {
+ if (PTR_ERR(sc) == -EAGAIN)
+ continue;
+ err = PTR_ERR(sc);
+ goto done;
+ } else if (!sc) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpsc_element(sc,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_SCHEMAS);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_namespace *ns, *ns_ref;
+ struct ioam6_schema *sc, *sc_ref;
+ struct ioam6_pernet_data *nsdata;
+ __be16 ns_id;
+ u32 sc_id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID] ||
+ (!info->attrs[IOAM6_ATTR_SC_ID] &&
+ !info->attrs[IOAM6_ATTR_SC_NONE]))
+ return -EINVAL;
+
+ ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (info->attrs[IOAM6_ATTR_SC_NONE]) {
+ sc = NULL;
+ } else {
+ sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id,
+ rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+ }
+
+ sc_ref = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+ if (sc_ref)
+ rcu_assign_pointer(sc_ref->ns, NULL);
+ rcu_assign_pointer(ns->schema, sc);
+
+ if (sc) {
+ ns_ref = rcu_dereference_protected(sc->ns,
+ lockdep_is_held(&nsdata->lock));
+ if (ns_ref)
+ rcu_assign_pointer(ns_ref->schema, NULL);
+ rcu_assign_pointer(sc->ns, ns);
+ }
+
+ err = 0;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static const struct genl_ops ioam6_genl_ops[] = {
+ {
+ .cmd = IOAM6_CMD_ADD_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_NAMESPACES,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpns_start,
+ .dumpit = ioam6_genl_dumpns,
+ .done = ioam6_genl_dumpns_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_ADD_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_SCHEMAS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpsc_start,
+ .dumpit = ioam6_genl_dumpsc,
+ .done = ioam6_genl_dumpsc_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_NS_SET_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_ns_set_schema,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_ns_sc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1,
+ },
+};
+
+static struct genl_family ioam6_genl_family __ro_after_init = {
+ .name = IOAM6_GENL_NAME,
+ .version = IOAM6_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ioam6_genl_ops,
+ .n_ops = ARRAY_SIZE(ioam6_genl_ops),
+ .module = THIS_MODULE,
+};
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+}
+
+static void __ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace,
+ struct ioam6_schema *sc,
+ u8 sclen)
+{
+ struct __kernel_sock_timeval ts;
+ u64 raw64;
+ u32 raw32;
+ u16 raw16;
+ u8 *data;
+ u8 byte;
+
+ data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
+
+ /* hop_lim and node_id */
+ if (trace->type.bit0) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+
+ *(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
+ data += sizeof(__be32);
+ }
+
+ /* ingress_if_id and egress_if_id */
+ if (trace->type.bit1) {
+ if (!skb->dev)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+ }
+
+ /* timestamp seconds */
+ if (trace->type.bit2) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ skb_get_new_timestamp(skb, &ts);
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* timestamp subseconds */
+ if (trace->type.bit3) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ if (!trace->type.bit2)
+ skb_get_new_timestamp(skb, &ts);
+
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* transit delay */
+ if (trace->type.bit4) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data */
+ if (trace->type.bit5) {
+ *(__be32 *)data = ns->data;
+ data += sizeof(__be32);
+ }
+
+ /* queue depth */
+ if (trace->type.bit6) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* checksum complement */
+ if (trace->type.bit7) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* hop_lim and node_id (wide) */
+ if (trace->type.bit8) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+
+ *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
+ data += sizeof(__be64);
+ }
+
+ /* ingress_if_id and egress_if_id (wide) */
+ if (trace->type.bit9) {
+ if (!skb->dev)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data (wide) */
+ if (trace->type.bit10) {
+ *(__be64 *)data = ns->data_wide;
+ data += sizeof(__be64);
+ }
+
+ /* buffer occupancy */
+ if (trace->type.bit11) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* opaque state snapshot */
+ if (trace->type.bit22) {
+ if (!sc) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8);
+ } else {
+ *(__be32 *)data = sc->hdr;
+ data += sizeof(__be32);
+
+ memcpy(data, sc->data, sc->len);
+ }
+ }
+}
+
+/* called with rcu_read_lock() */
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_schema *sc;
+ u8 sclen = 0;
+
+ /* Skip if Overflow flag is set OR
+ * if an unknown type (bit 12-21) is set
+ */
+ if (trace->overflow ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21) {
+ return;
+ }
+
+ /* NodeLen does not include Opaque State Snapshot length. We need to
+ * take it into account if the corresponding bit is set (bit 22) and
+ * if the current IOAM namespace has an active schema attached to it
+ */
+ sc = rcu_dereference(ns->schema);
+ if (trace->type.bit22) {
+ sclen = sizeof_field(struct ioam6_schema, hdr) / 4;
+
+ if (sc)
+ sclen += sc->len / 4;
+ }
+
+ /* If there is no space remaining, we set the Overflow flag and we
+ * skip without filling the trace
+ */
+ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) {
+ trace->overflow = 1;
+ return;
+ }
+
+ __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+ trace->remlen -= trace->nodelen + sclen;
+}
+
+static int __net_init ioam6_net_init(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata;
+ int err = -ENOMEM;
+
+ nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL);
+ if (!nsdata)
+ goto out;
+
+ mutex_init(&nsdata->lock);
+ net->ipv6.ioam6_data = nsdata;
+
+ err = rhashtable_init(&nsdata->namespaces, &rht_ns_params);
+ if (err)
+ goto free_nsdata;
+
+ err = rhashtable_init(&nsdata->schemas, &rht_sc_params);
+ if (err)
+ goto free_rht_ns;
+
+out:
+ return err;
+free_rht_ns:
+ rhashtable_destroy(&nsdata->namespaces);
+free_nsdata:
+ kfree(nsdata);
+ net->ipv6.ioam6_data = NULL;
+ goto out;
+}
+
+static void __net_exit ioam6_net_exit(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL);
+ rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL);
+
+ kfree(nsdata);
+}
+
+static struct pernet_operations ioam6_net_ops = {
+ .init = ioam6_net_init,
+ .exit = ioam6_net_exit,
+};
+
+int __init ioam6_init(void)
+{
+ int err = register_pernet_subsys(&ioam6_net_ops);
+ if (err)
+ goto out;
+
+ err = genl_register_family(&ioam6_genl_family);
+ if (err)
+ goto out_unregister_pernet_subsys;
+
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ err = ioam6_iptunnel_init();
+ if (err)
+ goto out_unregister_genl;
+#endif
+
+ pr_info("In-situ OAM (IOAM) with IPv6\n");
+
+out:
+ return err;
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+out_unregister_genl:
+ genl_unregister_family(&ioam6_genl_family);
+#endif
+out_unregister_pernet_subsys:
+ unregister_pernet_subsys(&ioam6_net_ops);
+ goto out;
+}
+
+void ioam6_exit(void)
+{
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ ioam6_iptunnel_exit();
+#endif
+ genl_unregister_family(&ioam6_genl_family);
+ unregister_pernet_subsys(&ioam6_net_ops);
+}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
new file mode 100644
index 000000000000..f9ee04541c17
--- /dev/null
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM Lightweight Tunnel implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/in6.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_iptunnel.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <net/ioam6.h>
+
+#define IOAM6_MASK_SHORT_FIELDS 0xff100000
+#define IOAM6_MASK_WIDE_FIELDS 0xe00000
+
+struct ioam6_lwt_encap {
+ struct ipv6_hopopt_hdr eh;
+ u8 pad[2]; /* 2-octet padding for 4n-alignment */
+ struct ioam6_hdr ioamh;
+ struct ioam6_trace_hdr traceh;
+} __packed;
+
+struct ioam6_lwt {
+ struct ioam6_lwt_encap tuninfo;
+};
+
+static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
+{
+ return (struct ioam6_lwt *)lwt->data;
+}
+
+static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
+{
+ return &ioam6_lwt_state(lwt)->tuninfo;
+}
+
+static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+{
+ return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
+}
+
+static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+};
+
+static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_trace_hdr *data;
+ struct nlattr *nla;
+ int len;
+
+ len = sizeof(*trace);
+
+ nla = nla_reserve(skb, attrtype, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ data = nla_data(nla);
+ memcpy(data, trace, len);
+
+ return 0;
+}
+
+static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
+{
+ u32 fields;
+
+ if (!trace->type_be32 || !trace->remlen ||
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ return false;
+
+ trace->nodelen = 0;
+ fields = be32_to_cpu(trace->type_be32);
+
+ trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
+ * (sizeof(__be32) / 4);
+ trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
+ * (sizeof(__be64) / 4);
+
+ return true;
+}
+
+static int ioam6_build_state(struct net *net, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
+ struct ioam6_lwt_encap *tuninfo;
+ struct ioam6_trace_hdr *trace;
+ struct lwtunnel_state *s;
+ int len_aligned;
+ int len, err;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
+ ioam6_iptunnel_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[IOAM6_IPTUNNEL_TRACE]) {
+ NL_SET_ERR_MSG(extack, "missing trace");
+ return -EINVAL;
+ }
+
+ trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
+ if (!ioam6_validate_trace_hdr(trace)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
+ "invalid trace validation");
+ return -EINVAL;
+ }
+
+ len = sizeof(*tuninfo) + trace->remlen * 4;
+ len_aligned = ALIGN(len, 8);
+
+ s = lwtunnel_state_alloc(len_aligned);
+ if (!s)
+ return -ENOMEM;
+
+ tuninfo = ioam6_lwt_info(s);
+ tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+ tuninfo->pad[0] = IPV6_TLV_PADN;
+ tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
+ tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
+ tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
+ + trace->remlen * 4;
+
+ memcpy(&tuninfo->traceh, trace, sizeof(*trace));
+
+ len = len_aligned - len;
+ if (len == 1) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
+ } else if (len > 0) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
+ tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+ }
+
+ s->type = LWTUNNEL_ENCAP_IOAM6;
+ s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+ *ts = s;
+
+ return 0;
+}
+
+static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ipv6hdr *oldhdr, *hdr;
+ struct ioam6_namespace *ns;
+ int hdrlen, err;
+
+ hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ oldhdr = ipv6_hdr(skb);
+ skb_pull(skb, sizeof(*oldhdr));
+ skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
+
+ skb_push(skb, sizeof(*oldhdr) + hdrlen);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ memmove(hdr, oldhdr, sizeof(*oldhdr));
+ tuninfo->eh.nexthdr = hdr->nexthdr;
+
+ skb_set_transport_header(skb, sizeof(*hdr));
+ skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
+
+ memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+ hdr->nexthdr = NEXTHDR_HOP;
+ hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+
+ trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+ + sizeof(struct ipv6_hopopt_hdr) + 2
+ + sizeof(struct ioam6_hdr));
+
+ ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
+ if (ns)
+ ioam6_fill_trace_data(skb, ns, trace);
+
+ return 0;
+}
+
+static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+ int err = -EINVAL;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ /* Only for packets we send and
+ * that do not contain a Hop-by-Hop yet
+ */
+ if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+ goto out;
+
+ err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
+ if (unlikely(err))
+ goto drop;
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ if (unlikely(err))
+ goto drop;
+
+out:
+ return lwt->orig_output(net, sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static int ioam6_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ return nla_total_size(sizeof(*trace));
+}
+
+static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
+ struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
+
+ return (a_hdr->namespace_id != b_hdr->namespace_id);
+}
+
+static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
+ .build_state = ioam6_build_state,
+ .output = ioam6_output,
+ .fill_encap = ioam6_fill_encap_info,
+ .get_encap_size = ioam6_encap_nlsize,
+ .cmp_encap = ioam6_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+int __init ioam6_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
+
+void ioam6_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d7cf26f730d7..d53dd142bf87 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -21,6 +21,7 @@
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+#include <linux/ioam6.h>
static int two = 2;
static int three = 3;
@@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static u32 rt6_multipath_hash_fields_all_mask =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
+static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "ioam6_id",
+ .data = &init_net.ipv6.sysctl.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = &ioam6_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &init_net.ipv6.sysctl.ioam6_id_wide,
+ .maxlen = sizeof(u64),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra2 = &ioam6_id_wide_max,
+ },
{ }
};