diff options
author | David S. Miller <davem@davemloft.net> | 2020-10-02 23:16:15 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-10-02 23:16:15 +0300 |
commit | c16bcd70a11b52d20877aa4e0b59285690a1b268 (patch) | |
tree | 8d79467b920406544c2f7d415e71c8441d647016 | |
parent | 949ca6b82e43b342dba153a9fd643fb1b5e9f034 (diff) | |
parent | 61e7113e48d3ca1ea692b5c6376a4b545b312166 (diff) | |
download | linux-c16bcd70a11b52d20877aa4e0b59285690a1b268.tar.xz |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says:
====================
pull request (net-next): ipsec-next 2020-10-02
1) Add a full xfrm compatible layer for 32-bit applications on
64-bit kernels. From Dmitry Safonov.
Please pull or let me know if there are problems.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | include/net/xfrm.h | 33 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 47 | ||||
-rw-r--r-- | net/xfrm/Kconfig | 11 | ||||
-rw-r--r-- | net/xfrm/Makefile | 1 | ||||
-rw-r--r-- | net/xfrm/xfrm_compat.c | 625 | ||||
-rw-r--r-- | net/xfrm/xfrm_state.c | 77 | ||||
-rw-r--r-- | net/xfrm/xfrm_user.c | 110 | ||||
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/ipsec.c | 2195 |
11 files changed, 3066 insertions, 36 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c1e946606dce..8f1c9eb6a869 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12158,6 +12158,7 @@ F: net/ipv6/ipcomp6.c F: net/ipv6/xfrm* F: net/key/ F: net/xfrm/ +F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] M: "David S. Miller" <davem@davemloft.net> diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2737d24ec244..53618a31634b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2000,6 +2000,39 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, return 0; } +extern const int xfrm_msg_min[XFRM_NR_MSGTYPES]; +extern const struct nla_policy xfrma_policy[XFRMA_MAX+1]; + +struct xfrm_translator { + /* Allocate frag_list and put compat translation there */ + int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src); + + /* Allocate nlmsg with 64-bit translaton of received 32-bit message */ + struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh, + int maxtype, const struct nla_policy *policy, + struct netlink_ext_ack *extack); + + /* Translate 32-bit user_policy from sockptr */ + int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen); + + struct module *owner; +}; + +#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT) +extern int xfrm_register_translator(struct xfrm_translator *xtr); +extern int xfrm_unregister_translator(struct xfrm_translator *xtr); +extern struct xfrm_translator *xfrm_get_translator(void); +extern void xfrm_put_translator(struct xfrm_translator *xtr); +#else +static inline struct xfrm_translator *xfrm_get_translator(void) +{ + return NULL; +} +static inline void xfrm_put_translator(struct xfrm_translator *xtr) +{ +} +#endif + #if IS_ENABLED(CONFIG_IPV6) static inline bool xfrm6_local_dontfrag(const struct sock *sk) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4ec4c0f72f61..df675a8e1918 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2186,13 +2186,35 @@ EXPORT_SYMBOL(__nlmsg_put); * It would be better to create kernel thread. */ +static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, + struct netlink_callback *cb, + struct netlink_ext_ack *extack) +{ + struct nlmsghdr *nlh; + + nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno), + NLM_F_MULTI | cb->answer_flags); + if (WARN_ON(!nlh)) + return -ENOBUFS; + + nl_dump_check_consistent(cb, nlh); + memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno)); + + if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) { + nlh->nlmsg_flags |= NLM_F_ACK_TLVS; + if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)) + nlmsg_end(skb, nlh); + } + + return 0; +} + static int netlink_dump(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; - struct nlmsghdr *nlh; struct module *module; int err = -ENOBUFS; int alloc_min_size; @@ -2258,22 +2280,19 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, - sizeof(nlk->dump_done_errno), - NLM_F_MULTI | cb->answer_flags); - if (WARN_ON(!nlh)) + if (netlink_dump_done(nlk, skb, cb, &extack)) goto errout_skb; - nl_dump_check_consistent(cb, nlh); - - memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, - sizeof(nlk->dump_done_errno)); - - if (extack._msg && nlk->flags & NETLINK_F_EXT_ACK) { - nlh->nlmsg_flags |= NLM_F_ACK_TLVS; - if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack._msg)) - nlmsg_end(skb, nlh); +#ifdef CONFIG_COMPAT_NETLINK_MESSAGES + /* frag_list skb's data is used for compat tasks + * and the regular skb's data for normal (non-compat) tasks. + * See netlink_recvmsg(). + */ + if (unlikely(skb_shinfo(skb)->frag_list)) { + if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack)) + goto errout_skb; } +#endif if (sk_filter(sk, skb)) kfree_skb(skb); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 5b9a5ab48111..3adf31a83a79 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -28,6 +28,17 @@ config XFRM_USER If unsure, say Y. +config XFRM_USER_COMPAT + tristate "Compatible ABI support" + depends on XFRM_USER && COMPAT_FOR_U64_ALIGNMENT && \ + HAVE_EFFICIENT_UNALIGNED_ACCESS + select WANT_COMPAT_NETLINK_MESSAGES + help + Transformation(XFRM) user configuration interface like IPsec + used by compatible Linux applications. + + If unsure, say N. + config XFRM_INTERFACE tristate "Transformation virtual interface" depends on XFRM && IPV6 diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 2d4bb4b9f75e..494aa744bfb9 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o +obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c new file mode 100644 index 000000000000..e28f0c9ecd6a --- /dev/null +++ b/net/xfrm/xfrm_compat.c @@ -0,0 +1,625 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * XFRM compat layer + * Author: Dmitry Safonov <dima@arista.com> + * Based on code and translator idea by: Florian Westphal <fw@strlen.de> + */ +#include <linux/compat.h> +#include <linux/xfrm.h> +#include <net/xfrm.h> + +struct compat_xfrm_lifetime_cfg { + compat_u64 soft_byte_limit, hard_byte_limit; + compat_u64 soft_packet_limit, hard_packet_limit; + compat_u64 soft_add_expires_seconds, hard_add_expires_seconds; + compat_u64 soft_use_expires_seconds, hard_use_expires_seconds; +}; /* same size on 32bit, but only 4 byte alignment required */ + +struct compat_xfrm_lifetime_cur { + compat_u64 bytes, packets, add_time, use_time; +}; /* same size on 32bit, but only 4 byte alignment required */ + +struct compat_xfrm_userpolicy_info { + struct xfrm_selector sel; + struct compat_xfrm_lifetime_cfg lft; + struct compat_xfrm_lifetime_cur curlft; + __u32 priority, index; + u8 dir, action, flags, share; + /* 4 bytes additional padding on 64bit */ +}; + +struct compat_xfrm_usersa_info { + struct xfrm_selector sel; + struct xfrm_id id; + xfrm_address_t saddr; + struct compat_xfrm_lifetime_cfg lft; + struct compat_xfrm_lifetime_cur curlft; + struct xfrm_stats stats; + __u32 seq, reqid; + u16 family; + u8 mode, replay_window, flags; + /* 4 bytes additional padding on 64bit */ +}; + +struct compat_xfrm_user_acquire { + struct xfrm_id id; + xfrm_address_t saddr; + struct xfrm_selector sel; + struct compat_xfrm_userpolicy_info policy; + /* 4 bytes additional padding on 64bit */ + __u32 aalgos, ealgos, calgos, seq; +}; + +struct compat_xfrm_userspi_info { + struct compat_xfrm_usersa_info info; + /* 4 bytes additional padding on 64bit */ + __u32 min, max; +}; + +struct compat_xfrm_user_expire { + struct compat_xfrm_usersa_info state; + /* 8 bytes additional padding on 64bit */ + u8 hard; +}; + +struct compat_xfrm_user_polexpire { + struct compat_xfrm_userpolicy_info pol; + /* 8 bytes additional padding on 64bit */ + u8 hard; +}; + +#define XMSGSIZE(type) sizeof(struct type) + +static const int compat_msg_min[XFRM_NR_MSGTYPES] = { + [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_usersa_info), + [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), + [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), + [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userpolicy_info), + [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), + [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), + [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userspi_info), + [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_acquire), + [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_expire), + [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userpolicy_info), + [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_usersa_info), + [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_polexpire), + [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, + [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), + [XFRM_MSG_NEWSADINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping) +}; + +static const struct nla_policy compat_policy[XFRMA_MAX+1] = { + [XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)}, + [XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)}, + [XFRMA_LASTUSED] = { .type = NLA_U64}, + [XFRMA_ALG_AUTH_TRUNC] = { .len = sizeof(struct xfrm_algo_auth)}, + [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) }, + [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, + [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, + [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, + [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, + [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, + [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, + [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, + [XFRMA_TFCPAD] = { .type = NLA_U32 }, + [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, + [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, + [XFRMA_PROTO] = { .type = NLA_U8 }, + [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, + [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, + [XFRMA_SET_MARK] = { .type = NLA_U32 }, + [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, + [XFRMA_IF_ID] = { .type = NLA_U32 }, +}; + +static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, + const struct nlmsghdr *nlh_src, u16 type) +{ + int payload = compat_msg_min[type]; + int src_len = xfrm_msg_min[type]; + struct nlmsghdr *nlh_dst; + + /* Compat messages are shorter or equal to native (+padding) */ + if (WARN_ON_ONCE(src_len < payload)) + return ERR_PTR(-EMSGSIZE); + + nlh_dst = nlmsg_put(skb, nlh_src->nlmsg_pid, nlh_src->nlmsg_seq, + nlh_src->nlmsg_type, payload, nlh_src->nlmsg_flags); + if (!nlh_dst) + return ERR_PTR(-EMSGSIZE); + + memset(nlmsg_data(nlh_dst), 0, payload); + + switch (nlh_src->nlmsg_type) { + /* Compat message has the same layout as native */ + case XFRM_MSG_DELSA: + case XFRM_MSG_DELPOLICY: + case XFRM_MSG_FLUSHSA: + case XFRM_MSG_FLUSHPOLICY: + case XFRM_MSG_NEWAE: + case XFRM_MSG_REPORT: + case XFRM_MSG_MIGRATE: + case XFRM_MSG_NEWSADINFO: + case XFRM_MSG_NEWSPDINFO: + case XFRM_MSG_MAPPING: + WARN_ON_ONCE(src_len != payload); + memcpy(nlmsg_data(nlh_dst), nlmsg_data(nlh_src), src_len); + break; + /* 4 byte alignment for trailing u64 on native, but not on compat */ + case XFRM_MSG_NEWSA: + case XFRM_MSG_NEWPOLICY: + case XFRM_MSG_UPDSA: + case XFRM_MSG_UPDPOLICY: + WARN_ON_ONCE(src_len != payload + 4); + memcpy(nlmsg_data(nlh_dst), nlmsg_data(nlh_src), payload); + break; + case XFRM_MSG_EXPIRE: { + const struct xfrm_user_expire *src_ue = nlmsg_data(nlh_src); + struct compat_xfrm_user_expire *dst_ue = nlmsg_data(nlh_dst); + + /* compat_xfrm_user_expire has 4-byte smaller state */ + memcpy(dst_ue, src_ue, sizeof(dst_ue->state)); + dst_ue->hard = src_ue->hard; + break; + } + case XFRM_MSG_ACQUIRE: { + const struct xfrm_user_acquire *src_ua = nlmsg_data(nlh_src); + struct compat_xfrm_user_acquire *dst_ua = nlmsg_data(nlh_dst); + + memcpy(dst_ua, src_ua, offsetof(struct compat_xfrm_user_acquire, aalgos)); + dst_ua->aalgos = src_ua->aalgos; + dst_ua->ealgos = src_ua->ealgos; + dst_ua->calgos = src_ua->calgos; + dst_ua->seq = src_ua->seq; + break; + } + case XFRM_MSG_POLEXPIRE: { + const struct xfrm_user_polexpire *src_upe = nlmsg_data(nlh_src); + struct compat_xfrm_user_polexpire *dst_upe = nlmsg_data(nlh_dst); + + /* compat_xfrm_user_polexpire has 4-byte smaller state */ + memcpy(dst_upe, src_upe, sizeof(dst_upe->pol)); + dst_upe->hard = src_upe->hard; + break; + } + case XFRM_MSG_ALLOCSPI: { + const struct xfrm_userspi_info *src_usi = nlmsg_data(nlh_src); + struct compat_xfrm_userspi_info *dst_usi = nlmsg_data(nlh_dst); + + /* compat_xfrm_user_polexpire has 4-byte smaller state */ + memcpy(dst_usi, src_usi, sizeof(src_usi->info)); + dst_usi->min = src_usi->min; + dst_usi->max = src_usi->max; + break; + } + /* Not being sent by kernel */ + case XFRM_MSG_GETSA: + case XFRM_MSG_GETPOLICY: + case XFRM_MSG_GETAE: + case XFRM_MSG_GETSADINFO: + case XFRM_MSG_GETSPDINFO: + default: + WARN_ONCE(1, "unsupported nlmsg_type %d", nlh_src->nlmsg_type); + return ERR_PTR(-EOPNOTSUPP); + } + + return nlh_dst; +} + +static int xfrm_nla_cpy(struct sk_buff *dst, const struct nlattr *src, int len) +{ + return nla_put(dst, src->nla_type, len, nla_data(src)); +} + +static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) +{ + switch (src->nla_type) { + case XFRMA_PAD: + /* Ignore */ + return 0; + case XFRMA_ALG_AUTH: + case XFRMA_ALG_CRYPT: + case XFRMA_ALG_COMP: + case XFRMA_ENCAP: + case XFRMA_TMPL: + return xfrm_nla_cpy(dst, src, nla_len(src)); + case XFRMA_SA: + return xfrm_nla_cpy(dst, src, XMSGSIZE(compat_xfrm_usersa_info)); + case XFRMA_POLICY: + return xfrm_nla_cpy(dst, src, XMSGSIZE(compat_xfrm_userpolicy_info)); + case XFRMA_SEC_CTX: + return xfrm_nla_cpy(dst, src, nla_len(src)); + case XFRMA_LTIME_VAL: + return nla_put_64bit(dst, src->nla_type, nla_len(src), + nla_data(src), XFRMA_PAD); + case XFRMA_REPLAY_VAL: + case XFRMA_REPLAY_THRESH: + case XFRMA_ETIMER_THRESH: + case XFRMA_SRCADDR: + case XFRMA_COADDR: + return xfrm_nla_cpy(dst, src, nla_len(src)); + case XFRMA_LASTUSED: + return nla_put_64bit(dst, src->nla_type, nla_len(src), + nla_data(src), XFRMA_PAD); + case XFRMA_POLICY_TYPE: + case XFRMA_MIGRATE: + case XFRMA_ALG_AEAD: + case XFRMA_KMADDRESS: + case XFRMA_ALG_AUTH_TRUNC: + case XFRMA_MARK: + case XFRMA_TFCPAD: + case XFRMA_REPLAY_ESN_VAL: + case XFRMA_SA_EXTRA_FLAGS: + case XFRMA_PROTO: + case XFRMA_ADDRESS_FILTER: + case XFRMA_OFFLOAD_DEV: + case XFRMA_SET_MARK: + case XFRMA_SET_MARK_MASK: + case XFRMA_IF_ID: + return xfrm_nla_cpy(dst, src, nla_len(src)); + default: + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); + WARN_ONCE(1, "unsupported nla_type %d", src->nla_type); + return -EOPNOTSUPP; + } +} + +/* Take kernel-built (64bit layout) and create 32bit layout for userspace */ +static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) +{ + u16 type = nlh_src->nlmsg_type - XFRM_MSG_BASE; + const struct nlattr *nla, *attrs; + struct nlmsghdr *nlh_dst; + int len, remaining; + + nlh_dst = xfrm_nlmsg_put_compat(dst, nlh_src, type); + if (IS_ERR(nlh_dst)) + return PTR_ERR(nlh_dst); + + attrs = nlmsg_attrdata(nlh_src, xfrm_msg_min[type]); + len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); + + nla_for_each_attr(nla, attrs, len, remaining) { + int err = xfrm_xlate64_attr(dst, nla); + + if (err) + return err; + } + + nlmsg_end(dst, nlh_dst); + + return 0; +} + +static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src) +{ + u16 type = nlh_src->nlmsg_type - XFRM_MSG_BASE; + struct sk_buff *new = NULL; + int err; + + if (WARN_ON_ONCE(type >= ARRAY_SIZE(xfrm_msg_min))) + return -EOPNOTSUPP; + + if (skb_shinfo(skb)->frag_list == NULL) { + new = alloc_skb(skb->len + skb_tailroom(skb), GFP_ATOMIC); + if (!new) + return -ENOMEM; + skb_shinfo(skb)->frag_list = new; + } + + err = xfrm_xlate64(skb_shinfo(skb)->frag_list, nlh_src); + if (err) { + if (new) { + kfree_skb(new); + skb_shinfo(skb)->frag_list = NULL; + } + return err; + } + + return 0; +} + +/* Calculates len of translated 64-bit message. */ +static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, + struct nlattr *attrs[XFRMA_MAX+1]) +{ + size_t len = nlmsg_len(src); + + switch (src->nlmsg_type) { + case XFRM_MSG_NEWSA: + case XFRM_MSG_NEWPOLICY: + case XFRM_MSG_ALLOCSPI: + case XFRM_MSG_ACQUIRE: + case XFRM_MSG_UPDPOLICY: + case XFRM_MSG_UPDSA: + len += 4; + break; + case XFRM_MSG_EXPIRE: + case XFRM_MSG_POLEXPIRE: + len += 8; + break; + default: + break; + } + + if (attrs[XFRMA_SA]) + len += 4; + if (attrs[XFRMA_POLICY]) + len += 4; + + /* XXX: some attrs may need to be realigned + * if !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + */ + + return len; +} + +static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, + size_t size, int copy_len, int payload) +{ + struct nlmsghdr *nlmsg = dst; + struct nlattr *nla; + + if (WARN_ON_ONCE(copy_len > payload)) + copy_len = payload; + + if (size - *pos < nla_attr_size(payload)) + return -ENOBUFS; + + nla = dst + *pos; + + memcpy(nla, src, nla_attr_size(copy_len)); + nla->nla_len = nla_attr_size(payload); + *pos += nla_attr_size(payload); + nlmsg->nlmsg_len += nla->nla_len; + + memset(dst + *pos, 0, payload - copy_len); + *pos += payload - copy_len; + + return 0; +} + +static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, + size_t *pos, size_t size, + struct netlink_ext_ack *extack) +{ + int type = nla_type(nla); + u16 pol_len32, pol_len64; + int err; + + if (type > XFRMA_MAX) { + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); + NL_SET_ERR_MSG(extack, "Bad attribute"); + return -EOPNOTSUPP; + } + if (nla_len(nla) < compat_policy[type].len) { + NL_SET_ERR_MSG(extack, "Attribute bad length"); + return -EOPNOTSUPP; + } + + pol_len32 = compat_policy[type].len; + pol_len64 = xfrma_policy[type].len; + + /* XFRMA_SA and XFRMA_POLICY - need to know how-to translate */ + if (pol_len32 != pol_len64) { + if (nla_len(nla) != compat_policy[type].len) { + NL_SET_ERR_MSG(extack, "Attribute bad length"); + return -EOPNOTSUPP; + } + err = xfrm_attr_cpy32(dst, pos, nla, size, pol_len32, pol_len64); + if (err) + return err; + } + + return xfrm_attr_cpy32(dst, pos, nla, size, nla_len(nla), nla_len(nla)); +} + +static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, + struct nlattr *attrs[XFRMA_MAX+1], + size_t size, u8 type, struct netlink_ext_ack *extack) +{ + size_t pos; + int i; + + memcpy(dst, src, NLMSG_HDRLEN); + dst->nlmsg_len = NLMSG_HDRLEN + xfrm_msg_min[type]; + memset(nlmsg_data(dst), 0, xfrm_msg_min[type]); + + switch (src->nlmsg_type) { + /* Compat message has the same layout as native */ + case XFRM_MSG_DELSA: + case XFRM_MSG_GETSA: + case XFRM_MSG_DELPOLICY: + case XFRM_MSG_GETPOLICY: + case XFRM_MSG_FLUSHSA: + case XFRM_MSG_FLUSHPOLICY: + case XFRM_MSG_NEWAE: + case XFRM_MSG_GETAE: + case XFRM_MSG_REPORT: + case XFRM_MSG_MIGRATE: + case XFRM_MSG_NEWSADINFO: + case XFRM_MSG_GETSADINFO: + case XFRM_MSG_NEWSPDINFO: + case XFRM_MSG_GETSPDINFO: + case XFRM_MSG_MAPPING: + memcpy(nlmsg_data(dst), nlmsg_data(src), compat_msg_min[type]); + break; + /* 4 byte alignment for trailing u64 on native, but not on compat */ + case XFRM_MSG_NEWSA: + case XFRM_MSG_NEWPOLICY: + case XFRM_MSG_UPDSA: + case XFRM_MSG_UPDPOLICY: + memcpy(nlmsg_data(dst), nlmsg_data(src), compat_msg_min[type]); + break; + case XFRM_MSG_EXPIRE: { + const struct compat_xfrm_user_expire *src_ue = nlmsg_data(src); + struct xfrm_user_expire *dst_ue = nlmsg_data(dst); + + /* compat_xfrm_user_expire has 4-byte smaller state */ + memcpy(dst_ue, src_ue, sizeof(src_ue->state)); + dst_ue->hard = src_ue->hard; + break; + } + case XFRM_MSG_ACQUIRE: { + const struct compat_xfrm_user_acquire *src_ua = nlmsg_data(src); + struct xfrm_user_acquire *dst_ua = nlmsg_data(dst); + + memcpy(dst_ua, src_ua, offsetof(struct compat_xfrm_user_acquire, aalgos)); + dst_ua->aalgos = src_ua->aalgos; + dst_ua->ealgos = src_ua->ealgos; + dst_ua->calgos = src_ua->calgos; + dst_ua->seq = src_ua->seq; + break; + } + case XFRM_MSG_POLEXPIRE: { + const struct compat_xfrm_user_polexpire *src_upe = nlmsg_data(src); + struct xfrm_user_polexpire *dst_upe = nlmsg_data(dst); + + /* compat_xfrm_user_polexpire has 4-byte smaller state */ + memcpy(dst_upe, src_upe, sizeof(src_upe->pol)); + dst_upe->hard = src_upe->hard; + break; + } + case XFRM_MSG_ALLOCSPI: { + const struct compat_xfrm_userspi_info *src_usi = nlmsg_data(src); + struct xfrm_userspi_info *dst_usi = nlmsg_data(dst); + + /* compat_xfrm_user_polexpire has 4-byte smaller state */ + memcpy(dst_usi, src_usi, sizeof(src_usi->info)); + dst_usi->min = src_usi->min; + dst_usi->max = src_usi->max; + break; + } + default: + NL_SET_ERR_MSG(extack, "Unsupported message type"); + return -EOPNOTSUPP; + } + pos = dst->nlmsg_len; + + for (i = 1; i < XFRMA_MAX + 1; i++) { + int err; + + if (i == XFRMA_PAD) + continue; + + if (!attrs[i]) + continue; + + err = xfrm_xlate32_attr(dst, attrs[i], &pos, size, extack); + if (err) + return err; + } + + return 0; +} + +static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, + int maxtype, const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + /* netlink_rcv_skb() checks if a message has full (struct nlmsghdr) */ + u16 type = h32->nlmsg_type - XFRM_MSG_BASE; + struct nlattr *attrs[XFRMA_MAX+1]; + struct nlmsghdr *h64; + size_t len; + int err; + + BUILD_BUG_ON(ARRAY_SIZE(xfrm_msg_min) != ARRAY_SIZE(compat_msg_min)); + + if (type >= ARRAY_SIZE(xfrm_msg_min)) + return ERR_PTR(-EINVAL); + + /* Don't call parse: the message might have only nlmsg header */ + if ((h32->nlmsg_type == XFRM_MSG_GETSA || + h32->nlmsg_type == XFRM_MSG_GETPOLICY) && + (h32->nlmsg_flags & NLM_F_DUMP)) + return NULL; + + err = nlmsg_parse_deprecated(h32, compat_msg_min[type], attrs, + maxtype ? : XFRMA_MAX, policy ? : compat_policy, extack); + if (err < 0) + return ERR_PTR(err); + + len = xfrm_user_rcv_calculate_len64(h32, attrs); + /* The message doesn't need translation */ + if (len == nlmsg_len(h32)) + return NULL; + + len += NLMSG_HDRLEN; + h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO); + if (!h64) + return ERR_PTR(-ENOMEM); + + err = xfrm_xlate32(h64, h32, attrs, len, type, extack); + if (err < 0) { + kvfree(h64); + return ERR_PTR(err); + } + + return h64; +} + +static int xfrm_user_policy_compat(u8 **pdata32, int optlen) +{ + struct compat_xfrm_userpolicy_info *p = (void *)*pdata32; + u8 *src_templates, *dst_templates; + u8 *data64; + + if (optlen < sizeof(*p)) + return -EINVAL; + + data64 = kmalloc_track_caller(optlen + 4, GFP_USER | __GFP_NOWARN); + if (!data64) + return -ENOMEM; + + memcpy(data64, *pdata32, sizeof(*p)); + memset(data64 + sizeof(*p), 0, 4); + + src_templates = *pdata32 + sizeof(*p); + dst_templates = data64 + sizeof(*p) + 4; + memcpy(dst_templates, src_templates, optlen - sizeof(*p)); + + kfree(*pdata32); + *pdata32 = data64; + return 0; +} + +static struct xfrm_translator xfrm_translator = { + .owner = THIS_MODULE, + .alloc_compat = xfrm_alloc_compat, + .rcv_msg_compat = xfrm_user_rcv_msg_compat, + .xlate_user_policy_sockptr = xfrm_user_policy_compat, +}; + +static int __init xfrm_compat_init(void) +{ + return xfrm_register_translator(&xfrm_translator); +} + +static void __exit xfrm_compat_exit(void) +{ + xfrm_unregister_translator(&xfrm_translator); +} + +module_init(xfrm_compat_init); +module_exit(xfrm_compat_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dmitry Safonov"); +MODULE_DESCRIPTION("XFRM 32-bit compatibility layer"); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 69520ad3d83b..f9961884500b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2264,6 +2264,66 @@ static bool km_is_alive(const struct km_event *c) return is_alive; } +#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT) +static DEFINE_SPINLOCK(xfrm_translator_lock); +static struct xfrm_translator __rcu *xfrm_translator; + +struct xfrm_translator *xfrm_get_translator(void) +{ + struct xfrm_translator *xtr; + + rcu_read_lock(); + xtr = rcu_dereference(xfrm_translator); + if (unlikely(!xtr)) + goto out; + if (!try_module_get(xtr->owner)) + xtr = NULL; +out: + rcu_read_unlock(); + return xtr; +} +EXPORT_SYMBOL_GPL(xfrm_get_translator); + +void xfrm_put_translator(struct xfrm_translator *xtr) +{ + module_put(xtr->owner); +} +EXPORT_SYMBOL_GPL(xfrm_put_translator); + +int xfrm_register_translator(struct xfrm_translator *xtr) +{ + int err = 0; + + spin_lock_bh(&xfrm_translator_lock); + if (unlikely(xfrm_translator != NULL)) + err = -EEXIST; + else + rcu_assign_pointer(xfrm_translator, xtr); + spin_unlock_bh(&xfrm_translator_lock); + + return err; +} +EXPORT_SYMBOL_GPL(xfrm_register_translator); + +int xfrm_unregister_translator(struct xfrm_translator *xtr) +{ + int err = 0; + + spin_lock_bh(&xfrm_translator_lock); + if (likely(xfrm_translator != NULL)) { + if (rcu_access_pointer(xfrm_translator) != xtr) + err = -EINVAL; + else + RCU_INIT_POINTER(xfrm_translator, NULL); + } + spin_unlock_bh(&xfrm_translator_lock); + synchronize_rcu(); + + return err; +} +EXPORT_SYMBOL_GPL(xfrm_unregister_translator); +#endif + int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) { int err; @@ -2271,9 +2331,6 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) struct xfrm_mgr *km; struct xfrm_policy *pol = NULL; - if (in_compat_syscall()) - return -EOPNOTSUPP; - if (sockptr_is_null(optval) && !optlen) { xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); @@ -2288,6 +2345,20 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) if (IS_ERR(data)) return PTR_ERR(data); + if (in_compat_syscall()) { + struct xfrm_translator *xtr = xfrm_get_translator(); + + if (!xtr) + return -EOPNOTSUPP; + + err = xtr->xlate_user_policy_sockptr(&data, optlen); + xfrm_put_translator(xtr); + if (err) { + kfree(data); + return err; + } + } + err = -EINVAL; rcu_read_lock(); list_for_each_entry_rcu(km, &xfrm_km_list, list) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fbb7d9d06478..d0c32a8fcc4a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -975,6 +975,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct xfrm_dump_info *sp = ptr; struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; + struct xfrm_translator *xtr; struct xfrm_usersa_info *p; struct nlmsghdr *nlh; int err; @@ -992,6 +993,18 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) return err; } nlmsg_end(skb, nlh); + + xtr = xfrm_get_translator(); + if (xtr) { + err = xtr->alloc_compat(skb, nlh); + + xfrm_put_translator(xtr); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + } + return 0; } @@ -1006,7 +1019,6 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb) return 0; } -static const struct nla_policy xfrma_policy[XFRMA_MAX+1]; static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -1083,12 +1095,24 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, u32 pid, unsigned int group) { struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + struct xfrm_translator *xtr; if (!nlsk) { kfree_skb(skb); return -EPIPE; } + xtr = xfrm_get_translator(); + if (xtr) { + int err = xtr->alloc_compat(skb, nlmsg_hdr(skb)); + + xfrm_put_translator(xtr); + if (err) { + kfree_skb(skb); + return err; + } + } + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } @@ -1308,6 +1332,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct xfrm_userspi_info *p; + struct xfrm_translator *xtr; struct sk_buff *resp_skb; xfrm_address_t *daddr; int family; @@ -1358,6 +1383,17 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } + xtr = xfrm_get_translator(); + if (xtr) { + err = xtr->alloc_compat(skb, nlmsg_hdr(skb)); + + xfrm_put_translator(xtr); + if (err) { + kfree_skb(resp_skb); + goto out; + } + } + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: @@ -1764,6 +1800,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct xfrm_userpolicy_info *p; struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; + struct xfrm_translator *xtr; struct nlmsghdr *nlh; int err; @@ -1788,6 +1825,18 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr return err; } nlmsg_end(skb, nlh); + + xtr = xfrm_get_translator(); + if (xtr) { + err = xtr->alloc_compat(skb, nlh); + + xfrm_put_translator(xtr); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + } + return 0; } @@ -2533,7 +2582,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #define XMSGSIZE(type) sizeof(struct type) -static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { +const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), @@ -2556,10 +2605,11 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), }; +EXPORT_SYMBOL_GPL(xfrm_msg_min); #undef XMSGSIZE -static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { +const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, @@ -2591,6 +2641,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, }; +EXPORT_SYMBOL_GPL(xfrma_policy); static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { [XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) }, @@ -2640,11 +2691,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; const struct xfrm_link *link; + struct nlmsghdr *nlh64 = NULL; int type, err; - if (in_compat_syscall()) - return -EOPNOTSUPP; - type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) return -EINVAL; @@ -2656,32 +2705,55 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; + if (in_compat_syscall()) { + struct xfrm_translator *xtr = xfrm_get_translator(); + + if (!xtr) + return -EOPNOTSUPP; + + nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max, + link->nla_pol, extack); + xfrm_put_translator(xtr); + if (IS_ERR(nlh64)) + return PTR_ERR(nlh64); + if (nlh64) + nlh = nlh64; + } + if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && (nlh->nlmsg_flags & NLM_F_DUMP)) { - if (link->dump == NULL) - return -EINVAL; + struct netlink_dump_control c = { + .start = link->start, + .dump = link->dump, + .done = link->done, + }; - { - struct netlink_dump_control c = { - .start = link->start, - .dump = link->dump, - .done = link->done, - }; - return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + if (link->dump == NULL) { + err = -EINVAL; + goto err; } + + err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + goto err; } err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs, link->nla_max ? : XFRMA_MAX, link->nla_pol ? : xfrma_policy, extack); if (err < 0) - return err; + goto err; - if (link->doit == NULL) - return -EINVAL; + if (link->doit == NULL) { + err = -EINVAL; + goto err; + } - return link->doit(skb, nlh, attrs); + err = link->doit(skb, nlh, attrs); + +err: + kvfree(nlh64); + return err; } static void xfrm_netlink_rcv(struct sk_buff *skb) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 742c499328b2..61ae899cfc17 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +ipsec msg_zerocopy socket psock_fanout diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 52923eb08934..4773ce72edbd 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,6 +30,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES += ipsec TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c new file mode 100644 index 000000000000..17ced7d6ce25 --- /dev/null +++ b/tools/testing/selftests/net/ipsec.c @@ -0,0 +1,2195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ipsec.c - Check xfrm on veth inside a net-ns. + * Copyright (c) 2018 Dmitry Safonov + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/types.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/limits.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <linux/xfrm.h> +#include <netinet/in.h> +#include <net/if.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +#include "../kselftest.h" + +#define printk(fmt, ...) \ + ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__) + +#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ +#define MAX_PAYLOAD 2048 +#define XFRM_ALGO_KEY_BUF_SIZE 512 +#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */ +#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */ +#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */ + +/* /30 mask for one veth connection */ +#define PREFIX_LEN 30 +#define child_ip(nr) (4*nr + 1) +#define grchild_ip(nr) (4*nr + 2) + +#define VETH_FMT "ktst-%d" +#define VETH_LEN 12 + +static int nsfd_parent = -1; +static int nsfd_childa = -1; +static int nsfd_childb = -1; +static long page_size; + +/* + * ksft_cnt is static in kselftest, so isn't shared with children. + * We have to send a test result back to parent and count there. + * results_fd is a pipe with test feedback from children. + */ +static int results_fd[2]; + +const unsigned int ping_delay_nsec = 50 * 1000 * 1000; +const unsigned int ping_timeout = 300; +const unsigned int ping_count = 100; +const unsigned int ping_success = 80; + +static void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } + + return; +} + +static int unshare_open(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + if (unshare(CLONE_NEWNET) != 0) { + pr_err("unshare()"); + return -1; + } + + fd = open(netns_path, O_RDONLY); + if (fd <= 0) { + pr_err("open(%s)", netns_path); + return -1; + } + + return fd; +} + +static int switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) { + pr_err("setns()"); + return -1; + } + return 0; +} + +/* + * Running the test inside a new parent net namespace to bother less + * about cleanup on error-path. + */ +static int init_namespaces(void) +{ + nsfd_parent = unshare_open(); + if (nsfd_parent <= 0) + return -1; + + nsfd_childa = unshare_open(); + if (nsfd_childa <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + + nsfd_childb = unshare_open(); + if (nsfd_childb <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + return 0; +} + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock <= 0) { + pr_err("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + printk("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int netlink_check_answer(int sock) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return answer.error; + } + + return 0; +} + +static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a, + const char *peerb, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_addr_set(int sock, uint32_t seq, const char *intf, + struct in_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = AF_INET; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + +#ifdef DEBUG + { + char addr_str[IPV4_STR_SZ] = {}; + + strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1); + + printk("ip addr set %s", addr_str); + } +#endif + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_route_set(int sock, uint32_t seq, const char *intf, + struct in_addr src, struct in_addr dst) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = AF_INET; + req.rt.rtm_dst_len = 32; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_LINK; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(sock); +} + +static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (ip4_addr_set(route_sock, (*route_seq)++, "lo", + tunsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + return -1; + } + + if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) { + printk("Failed to set ipv4 route"); + return -1; + } + + return 0; +} + +static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst) +{ + struct in_addr intsrc = inet_makeaddr(INADDR_B, src); + struct in_addr tunsrc = inet_makeaddr(INADDR_A, src); + struct in_addr tundst = inet_makeaddr(INADDR_A, dst); + int route_sock = -1, ret = -1; + uint32_t route_seq; + + if (switch_ns(nsfd)) + return -1; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) { + printk("Failed to open netlink route socket in child"); + return -1; + } + + if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + goto err; + } + + if (link_set_up(route_sock, route_seq++, veth)) { + printk("Failed to bring up %s", veth); + goto err; + } + + if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) { + printk("Failed to add tunnel route on %s", veth); + goto err; + } + ret = 0; + +err: + close(route_sock); + return ret; +} + +#define ALGO_LEN 64 +enum desc_type { + CREATE_TUNNEL = 0, + ALLOCATE_SPI, + MONITOR_ACQUIRE, + EXPIRE_STATE, + EXPIRE_POLICY, +}; +const char *desc_name[] = { + "create tunnel", + "alloc spi", + "monitor acquire", + "expire state", + "expire policy" +}; +struct xfrm_desc { + enum desc_type type; + uint8_t proto; + char a_algo[ALGO_LEN]; + char e_algo[ALGO_LEN]; + char c_algo[ALGO_LEN]; + char ae_algo[ALGO_LEN]; + unsigned int icv_len; + /* unsigned key_len; */ +}; + +enum msg_type { + MSG_ACK = 0, + MSG_EXIT, + MSG_PING, + MSG_XFRM_PREPARE, + MSG_XFRM_ADD, + MSG_XFRM_DEL, + MSG_XFRM_CLEANUP, +}; + +struct test_desc { + enum msg_type type; + union { + struct { + in_addr_t reply_ip; + unsigned int port; + } ping; + struct xfrm_desc xfrm_desc; + } body; +}; + +struct test_result { + struct xfrm_desc desc; + unsigned int res; +}; + +static void write_test_result(unsigned int res, struct xfrm_desc *d) +{ + struct test_result tr = {}; + ssize_t ret; + + tr.desc = *d; + tr.res = res; + + ret = write(results_fd[1], &tr, sizeof(tr)); + if (ret != sizeof(tr)) + pr_err("Failed to write the result in pipe %zd", ret); +} + +static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = write(fd, msg, sizeof(*msg)); + + /* Make sure that write/read is atomic to a pipe */ + BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF); + + if (bytes < 0) { + pr_err("write()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = read(fd, msg, sizeof(*msg)); + + if (bytes < 0) { + pr_err("read()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout, + unsigned int *server_port, int sock[2]) +{ + struct sockaddr_in server; + struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout }; + socklen_t s_len = sizeof(server); + + sock[0] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[0] < 0) { + pr_err("socket()"); + return -1; + } + + server.sin_family = AF_INET; + server.sin_port = 0; + memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr)); + + if (bind(sock[0], (struct sockaddr *)&server, s_len)) { + pr_err("bind()"); + goto err_close_server; + } + + if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) { + pr_err("getsockname()"); + goto err_close_server; + } + + *server_port = ntohs(server.sin_port); + + if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) { + pr_err("setsockopt()"); + goto err_close_server; + } + + sock[1] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[1] < 0) { + pr_err("socket()"); + goto err_close_server; + } + + return 0; + +err_close_server: + close(sock[0]); + return -1; +} + +static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } else if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + return 0; +} + +static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } + if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } + if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + return 0; +} + +typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len); +static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from, + bool init_side, int d_port, in_addr_t to, ping_f func) +{ + struct test_desc msg; + unsigned int s_port, i, ping_succeeded = 0; + int ping_sock[2]; + char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {}; + + if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) { + printk("Failed to init ping"); + return -1; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_PING; + msg.body.ping.port = s_port; + memcpy(&msg.body.ping.reply_ip, &from, sizeof(from)); + + write_msg(cmd_fd, &msg, 0); + if (init_side) { + /* The other end sends ip to ping */ + read_msg(cmd_fd, &msg, 0); + if (msg.type != MSG_PING) + return -1; + to = msg.body.ping.reply_ip; + d_port = msg.body.ping.port; + } + + for (i = 0; i < ping_count ; i++) { + struct timespec sleep_time = { + .tv_sec = 0, + .tv_nsec = ping_delay_nsec, + }; + + ping_succeeded += !func(ping_sock, to, d_port, buf, page_size); + nanosleep(&sleep_time, 0); + } + + close(ping_sock[0]); + close(ping_sock[1]); + + strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1); + strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1); + + if (ping_succeeded < ping_success) { + printk("ping (%s) %s->%s failed %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_count - ping_succeeded, ping_count); + return -1; + } + +#ifdef DEBUG + printk("ping (%s) %s->%s succeeded %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_succeeded, ping_count); +#endif + + return 0; +} + +static int xfrm_fill_key(char *name, char *buf, + size_t buf_len, unsigned int *key_len) +{ + /* TODO: use set/map instead */ + if (strncmp(name, "digest_null", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0) + *key_len = 64; + else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0) + *key_len = 192; + else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0) + *key_len = 384; + else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0) + *key_len = 448; + else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0) + *key_len = 512; + else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0) + *key_len = 152; + else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0) + *key_len = 216; + else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0) + *key_len = 280; + else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0) + *key_len = 0; + + if (*key_len > buf_len) { + printk("Can't pack a key - too big for buffer"); + return -1; + } + + randomize_buffer(buf, *key_len); + + return 0; +} + +static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz, + struct xfrm_desc *desc) +{ + struct { + union { + struct xfrm_algo alg; + struct xfrm_algo_aead aead; + struct xfrm_algo_auth auth; + } u; + char buf[XFRM_ALGO_KEY_BUF_SIZE]; + } alg = {}; + size_t alen, elen, clen, aelen; + unsigned short type; + + alen = strlen(desc->a_algo); + elen = strlen(desc->e_algo); + clen = strlen(desc->c_algo); + aelen = strlen(desc->ae_algo); + + /* Verify desc */ + switch (desc->proto) { + case IPPROTO_AH: + if (!alen || elen || clen || aelen) { + printk("BUG: buggy ah desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_AUTH; + break; + case IPPROTO_COMP: + if (!clen || elen || alen || aelen) { + printk("BUG: buggy comp desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_COMP; + break; + case IPPROTO_ESP: + if (!((alen && elen) ^ aelen) || clen) { + printk("BUG: buggy esp desc"); + return -1; + } + if (aelen) { + alg.u.aead.alg_icv_len = desc->icv_len; + strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key, + sizeof(alg.buf), &alg.u.aead.alg_key_len)) + return -1; + type = XFRMA_ALG_AEAD; + } else { + + strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1); + type = XFRMA_ALG_CRYPT; + if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN); + type = XFRMA_ALG_AUTH; + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + } + break; + default: + printk("BUG: unknown proto in desc"); + return -1; + } + + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + return 0; +} + +static inline uint32_t gen_spi(struct in_addr src) +{ + return htonl(inet_lnaof(src)); +} + +static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(dst)); + memcpy(&req.info.sel.saddr, &src, sizeof(src)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill id */ + memcpy(&req.info.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + req.info.id.spi = spi; + req.info.id.proto = desc->proto; + + memcpy(&req.info.saddr, &src, sizeof(src)); + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.family = AF_INET; + req.info.mode = XFRM_MODE_TUNNEL; + + if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc)) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + if (memcmp(&info->sel.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->sel.saddr, &src, sizeof(src))) + return false; + + if (info->sel.family != AF_INET || + info->sel.prefixlen_d != PREFIX_LEN || + info->sel.prefixlen_s != PREFIX_LEN) + return false; + + if (info->id.spi != spi || info->id.proto != desc->proto) + return false; + + if (memcmp(&info->id.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->saddr, &src, sizeof(src))) + return false; + + if (info->lft.soft_byte_limit != XFRM_INF || + info->lft.hard_byte_limit != XFRM_INF || + info->lft.soft_packet_limit != XFRM_INF || + info->lft.hard_packet_limit != XFRM_INF) + return false; + + if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL) + return false; + + /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */ + + return true; +} + +static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + char attrbuf[MAX_PAYLOAD]; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } answer; + struct xfrm_address_filter filter = {}; + bool found = false; + + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(0); + req.nh.nlmsg_type = XFRM_MSG_GETSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nh.nlmsg_seq = seq; + + /* + * Add dump filter by source address as there may be other tunnels + * in this netns (if tests run in parallel). + */ + filter.family = AF_INET; + filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */ + memcpy(&filter.saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER, + &filter, sizeof(filter))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + while (1) { + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } + if (answer.nh.nlmsg_type == NLMSG_ERROR) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return -1; + } else if (answer.nh.nlmsg_type == NLMSG_DONE) { + if (found) + return 0; + printk("didn't find allocated xfrm state in dump"); + return -1; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + if (xfrm_usersa_found(&answer.info, spi, src, dst, desc)) + found = true; + } + } +} + +static int xfrm_set(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, + struct xfrm_desc *desc) +{ + int err; + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + /* Check dumps for XFRM_MSG_GETSA */ + err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to check xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl tmpl; + + memset(&req, 0, sizeof(req)); + memset(&tmpl, 0, sizeof(tmpl)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.dir = dir; + + /* Fill tmpl */ + memcpy(&tmpl.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + tmpl.id.spi = spi; + tmpl.id.proto = proto; + tmpl.family = AF_INET; + memcpy(&tmpl.saddr, &src, sizeof(src)); + tmpl.mode = XFRM_MODE_TUNNEL; + tmpl.aalgos = (~(uint32_t)0); + tmpl.ealgos = (~(uint32_t)0); + tmpl.calgos = (~(uint32_t)0); + + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_prepare(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, + XFRM_POLICY_IN, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_policy_del(int xfrm_sock, uint32_t seq, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill id */ + memcpy(&req.id.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc)); + req.id.sel.family = AF_INET; + req.id.sel.prefixlen_d = PREFIX_LEN; + req.id.sel.prefixlen_s = PREFIX_LEN; + req.id.dir = dir; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_cleanup(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst, + XFRM_POLICY_OUT, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src, + XFRM_POLICY_IN, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + xfrm_address_t saddr = {}; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + memcpy(&req.id.daddr, &dst, sizeof(dst)); + req.id.family = AF_INET; + req.id.proto = proto; + /* Note: zero-spi cannot be deleted */ + req.id.spi = spi; + + memcpy(&saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_delete(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq, + uint32_t spi, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userspi_info spi; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + } answer; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi)); + req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + + req.spi.info.family = AF_INET; + req.spi.min = spi; + req.spi.max = spi; + req.spi.info.id.proto = proto; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + uint32_t new_spi = htonl(answer.info.id.spi); + + if (new_spi != spi) { + printk("allocated spi is different from requested: %#x != %#x", + new_spi, spi); + return KSFT_FAIL; + } + return KSFT_PASS; + } else if (answer.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type); + return KSFT_FAIL; + } + + printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error)); + return (answer.error) ? KSFT_FAIL : KSFT_PASS; +} + +static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups) +{ + struct sockaddr_nl snl = {}; + socklen_t addr_len; + int ret = -1; + + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + if (netlink_sock(sock, seq, proto)) { + printk("Failed to open xfrm netlink socket"); + return -1; + } + + if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) { + pr_err("bind()"); + goto out_close; + } + + addr_len = sizeof(snl); + if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) { + pr_err("getsockname()"); + goto out_close; + } + if (addr_len != sizeof(snl)) { + printk("Wrong address length %d", addr_len); + goto out_close; + } + if (snl.nl_family != AF_NETLINK) { + printk("Wrong address family %d", snl.nl_family); + goto out_close; + } + return 0; + +out_close: + close(*sock); + return ret; +} + +static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_acquire acq; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl xfrm_tmpl = {}; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq)); + req.nh.nlmsg_type = XFRM_MSG_ACQUIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + req.acq.policy.sel.family = AF_INET; + req.acq.aalgos = 0xfeed; + req.acq.ealgos = 0xbaad; + req.acq.calgos = 0xbabe; + + xfrm_tmpl.family = AF_INET; + xfrm_tmpl.id.proto = IPPROTO_ESP; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl))) + goto out_close; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad + || req.acq.calgos != 0xbabe) { + printk("xfrm_user_acquire has changed %x %x %x", + req.acq.aalgos, req.acq.ealgos, req.acq.calgos); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_state(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_expire expire; + int error; + }; + } req; + struct in_addr src, dst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + + if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) { + printk("Failed to add xfrm state"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_EXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst)); + req.expire.state.id.spi = gen_spi(src); + req.expire.state.id.proto = desc->proto; + req.expire.state.family = AF_INET; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_polexpire expire; + int error; + }; + } req; + struct in_addr src, dst, tunsrc, tundst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) { + printk("Failed to add xfrm policy"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + /* Fill selector. */ + memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc)); + req.expire.pol.sel.family = AF_INET; + req.expire.pol.sel.prefixlen_d = PREFIX_LEN; + req.expire.pol.sel.prefixlen_s = PREFIX_LEN; + req.expire.pol.dir = XFRM_POLICY_OUT; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int child_serv(int xfrm_sock, uint32_t *seq, + unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) +{ + struct in_addr src, dst, tunsrc, tundst; + struct test_desc msg; + int ret = KSFT_FAIL; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) { + printk("ping failed before setting xfrm"); + return KSFT_FAIL; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_PREPARE; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed to prepare xfrm"); + goto cleanup; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_ADD; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + printk("failed to set xfrm"); + goto delete; + } + + /* UDP pinging with xfrm tunnel */ + if (do_ping(cmd_fd, buf, page_size, tunsrc, + true, 0, 0, udp_ping_send)) { + printk("ping failed for xfrm"); + goto delete; + } + + ret = KSFT_PASS; +delete: + /* xfrm delete */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_DEL; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed ping to remove xfrm"); + ret = KSFT_FAIL; + } + +cleanup: + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_CLEANUP; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed ping to cleanup xfrm"); + ret = KSFT_FAIL; + } + return ret; +} + +static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) +{ + struct xfrm_desc desc; + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childa)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + /* Check that seq sock is ready, just for sure. */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_ACK; + write_msg(cmd_fd, &msg, 1); + read_msg(cmd_fd, &msg, 1); + if (msg.type != MSG_ACK) { + printk("Ack failed"); + exit(KSFT_FAIL); + } + + for (;;) { + ssize_t received = read(test_desc_fd, &desc, sizeof(desc)); + int ret; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(desc)) { + pr_err("read() returned %zd", received); + exit(KSFT_FAIL); + } + + switch (desc.type) { + case CREATE_TUNNEL: + ret = child_serv(xfrm_sock, &seq, nr, + cmd_fd, buf, &desc); + break; + case ALLOCATE_SPI: + ret = xfrm_state_allocspi(xfrm_sock, &seq, + -1, desc.proto); + break; + case MONITOR_ACQUIRE: + ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr); + break; + case EXPIRE_STATE: + ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc); + break; + case EXPIRE_POLICY: + ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); + break; + default: + printk("Unknown desc type %d", desc.type); + exit(KSFT_FAIL); + } + write_test_result(ret, &desc); + } + + close(xfrm_sock); + + msg.type = MSG_EXIT; + write_msg(cmd_fd, &msg, 1); + exit(KSFT_PASS); +} + +static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf, + struct test_desc *msg, int xfrm_sock, uint32_t *seq) +{ + struct in_addr src, dst, tunsrc, tundst; + bool tun_reply; + struct xfrm_desc *desc = &msg->body.xfrm_desc; + + src = inet_makeaddr(INADDR_B, grchild_ip(nr)); + dst = inet_makeaddr(INADDR_B, child_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr)); + tundst = inet_makeaddr(INADDR_A, child_ip(nr)); + + switch (msg->type) { + case MSG_EXIT: + exit(KSFT_PASS); + case MSG_ACK: + write_msg(cmd_fd, msg, 1); + break; + case MSG_PING: + tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t)); + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src, + false, msg->body.ping.port, + msg->body.ping.reply_ip, udp_ping_reply)) { + printk("ping failed before setting xfrm"); + } + break; + case MSG_XFRM_PREPARE: + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to prepare xfrm"); + } + break; + case MSG_XFRM_ADD: + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to set xfrm"); + } + break; + case MSG_XFRM_DEL: + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to remove xfrm"); + } + break; + case MSG_XFRM_CLEANUP: + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed to cleanup xfrm"); + } + break; + default: + printk("got unknown msg type %d", msg->type); + }; +} + +static int grand_child_f(unsigned int nr, int cmd_fd, void *buf) +{ + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childb)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + do { + read_msg(cmd_fd, &msg, 1); + grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq); + } while (1); + + close(xfrm_sock); + exit(KSFT_FAIL); +} + +static int start_child(unsigned int nr, char *veth, int test_desc_fd[2]) +{ + int cmd_sock[2]; + void *data_map; + pid_t child; + + if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr))) + return -1; + + if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr))) + return -1; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + /* in parent - selftest */ + return switch_ns(nsfd_parent); + } + + if (close(test_desc_fd[1])) { + pr_err("close()"); + return -1; + } + + /* child */ + data_map = mmap(0, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (data_map == MAP_FAILED) { + pr_err("mmap()"); + return -1; + } + + randomize_buffer(data_map, page_size); + + if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) { + pr_err("socketpair()"); + return -1; + } + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + if (close(cmd_sock[0])) { + pr_err("close()"); + return -1; + } + return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map); + } + if (close(cmd_sock[1])) { + pr_err("close()"); + return -1; + } + return grand_child_f(nr, cmd_sock[0], data_map); +} + +static void exit_usage(char **argv) +{ + printk("Usage: %s [nr_process]", argv[0]); + exit(KSFT_FAIL); +} + +static int __write_desc(int test_desc_fd, struct xfrm_desc *desc) +{ + ssize_t ret; + + ret = write(test_desc_fd, desc, sizeof(*desc)); + + if (ret == sizeof(*desc)) + return 0; + + pr_err("Writing test's desc failed %ld", ret); + + return -1; +} + +static int write_desc(int proto, int test_desc_fd, + char *a, char *e, char *c, char *ae) +{ + struct xfrm_desc desc = {}; + + desc.type = CREATE_TUNNEL; + desc.proto = proto; + + if (a) + strncpy(desc.a_algo, a, ALGO_LEN - 1); + if (e) + strncpy(desc.e_algo, e, ALGO_LEN - 1); + if (c) + strncpy(desc.c_algo, c, ALGO_LEN - 1); + if (ae) + strncpy(desc.ae_algo, ae, ALGO_LEN - 1); + + return __write_desc(test_desc_fd, &desc); +} + +int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP }; +char *ah_list[] = { + "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)", + "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)", + "xcbc(aes)", "cmac(aes)" +}; +char *comp_list[] = { + "deflate", +#if 0 + /* No compression backend realization */ + "lzs", "lzjh" +#endif +}; +char *e_list[] = { + "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)", + "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)", + "cbc(twofish)", "rfc3686(ctr(aes))" +}; +char *ae_list[] = { +#if 0 + /* not implemented */ + "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))", + "rfc7539esp(chacha20,poly1305)" +#endif +}; + +const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \ + + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \ + + ARRAY_SIZE(ae_list); + +static int write_proto_plan(int fd, int proto) +{ + unsigned int i; + + switch (proto) { + case IPPROTO_AH: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + if (write_desc(proto, fd, ah_list[i], 0, 0, 0)) + return -1; + } + break; + case IPPROTO_COMP: + for (i = 0; i < ARRAY_SIZE(comp_list); i++) { + if (write_desc(proto, fd, 0, 0, comp_list[i], 0)) + return -1; + } + break; + case IPPROTO_ESP: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(e_list); j++) { + if (write_desc(proto, fd, ah_list[i], + e_list[j], 0, 0)) + return -1; + } + } + for (i = 0; i < ARRAY_SIZE(ae_list); i++) { + if (write_desc(proto, fd, 0, 0, 0, ae_list[i])) + return -1; + } + break; + default: + printk("BUG: Specified unknown proto %d", proto); + return -1; + } + + return 0; +} + +/* + * Some structures in xfrm uapi header differ in size between + * 64-bit and 32-bit ABI: + * + * 32-bit UABI | 64-bit UABI + * -------------------------------------|------------------------------------- + * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224 + * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168 + * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232 + * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280 + * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232 + * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 + * + * Check the affected by the UABI difference structures. + */ +const unsigned int compat_plan = 4; +static int write_compat_struct_tests(int test_desc_fd) +{ + struct xfrm_desc desc = {}; + + desc.type = ALLOCATE_SPI; + desc.proto = IPPROTO_AH; + strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1); + + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = MONITOR_ACQUIRE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_STATE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_POLICY; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + return 0; +} + +static int write_test_plan(int test_desc_fd) +{ + unsigned int i; + pid_t child; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } + if (child) { + if (close(test_desc_fd)) + printk("close(): %m"); + return 0; + } + + if (write_compat_struct_tests(test_desc_fd)) + exit(KSFT_FAIL); + + for (i = 0; i < ARRAY_SIZE(proto_list); i++) { + if (write_proto_plan(test_desc_fd, proto_list[i])) + exit(KSFT_FAIL); + } + + exit(KSFT_PASS); +} + +static int children_cleanup(void) +{ + unsigned ret = KSFT_PASS; + + while (1) { + int status; + pid_t p = wait(&status); + + if ((p < 0) && errno == ECHILD) + break; + + if (p < 0) { + pr_err("wait()"); + return KSFT_FAIL; + } + + if (!WIFEXITED(status)) { + ret = KSFT_FAIL; + continue; + } + + if (WEXITSTATUS(status) == KSFT_FAIL) + ret = KSFT_FAIL; + } + + return ret; +} + +typedef void (*print_res)(const char *, ...); + +static int check_results(void) +{ + struct test_result tr = {}; + struct xfrm_desc *d = &tr.desc; + int ret = KSFT_PASS; + + while (1) { + ssize_t received = read(results_fd[0], &tr, sizeof(tr)); + print_res result; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(tr)) { + pr_err("read() returned %zd", received); + return KSFT_FAIL; + } + + switch (tr.res) { + case KSFT_PASS: + result = ksft_test_result_pass; + break; + case KSFT_FAIL: + default: + result = ksft_test_result_fail; + ret = KSFT_FAIL; + } + + result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n", + desc_name[d->type], (unsigned int)d->proto, d->a_algo, + d->e_algo, d->c_algo, d->ae_algo, d->icv_len); + } + + return ret; +} + +int main(int argc, char **argv) +{ + unsigned int nr_process = 1; + int route_sock = -1, ret = KSFT_SKIP; + int test_desc_fd[2]; + uint32_t route_seq; + unsigned int i; + + if (argc > 2) + exit_usage(argv); + + if (argc > 1) { + char *endptr; + + errno = 0; + nr_process = strtol(argv[1], &endptr, 10); + if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) + || (errno != 0 && nr_process == 0) + || (endptr == argv[1]) || (*endptr != '\0')) { + printk("Failed to parse [nr_process]"); + exit_usage(argv); + } + + if (nr_process > MAX_PROCESSES || !nr_process) { + printk("nr_process should be between [1; %u]", + MAX_PROCESSES); + exit_usage(argv); + } + } + + srand(time(NULL)); + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 1) + ksft_exit_skip("sysconf(): %m\n"); + + if (pipe2(test_desc_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (pipe2(results_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (init_namespaces()) + ksft_exit_skip("Failed to create namespaces\n"); + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + ksft_exit_skip("Failed to open netlink route socket\n"); + + for (i = 0; i < nr_process; i++) { + char veth[VETH_LEN]; + + snprintf(veth, VETH_LEN, VETH_FMT, i); + + if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) { + close(route_sock); + ksft_exit_fail_msg("Failed to create veth device"); + } + + if (start_child(i, veth, test_desc_fd)) { + close(route_sock); + ksft_exit_fail_msg("Child %u failed to start", i); + } + } + + if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1])) + ksft_exit_fail_msg("close(): %m"); + + ksft_set_plan(proto_plan + compat_plan); + + if (write_test_plan(test_desc_fd[1])) + ksft_exit_fail_msg("Failed to write test plan to pipe"); + + ret = check_results(); + + if (children_cleanup() == KSFT_FAIL) + exit(KSFT_FAIL); + + exit(ret); +} |