54 files changed, 4962 insertions, 3659 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 031127139f3b..7f1399ac028e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8193,6 +8193,16 @@ S:	Maintained
 F:	net/l3mdev
 F:	include/net/l3mdev.h
 
+L7 BPF FRAMEWORK
+M:	John Fastabend <john.fastabend@gmail.com>
+M:	Daniel Borkmann <daniel@iogearbox.net>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	include/linux/skmsg.h
+F:	net/core/skmsg.c
+F:	net/core/sock_map.c
+F:	net/ipv4/tcp_bpf.c
+
 LANTIQ / INTEL Ethernet drivers
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 L:	netdev@vger.kernel.org
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b558713447f..e60fff48288b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -737,33 +737,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 }
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
-struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
-struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
-int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
-int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-			struct bpf_prog *prog);
+#if defined(CONFIG_BPF_STREAM_PARSER)
+int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 #else
-static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	return NULL;
-}
-
-static inline struct sock  *__sock_hash_lookup_elem(struct bpf_map *map,
-						    void *key)
-{
-	return NULL;
-}
-
-static inline int sock_map_prog(struct bpf_map *map,
-				struct bpf_prog *prog,
-				u32 type)
+static inline int sock_map_prog_update(struct bpf_map *map,
+				       struct bpf_prog *prog, u32 which)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-				      struct bpf_prog *prog)
+static inline int sock_map_get_from_fd(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
@@ -839,6 +824,10 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
+extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
+extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
+extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
 
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..fa48343a5ea1 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -57,7 +57,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 #ifdef CONFIG_NET
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
+#if defined(CONFIG_BPF_STREAM_PARSER)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6791a0ac0139..5771874bc01e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -520,24 +520,6 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
-struct sk_msg_buff {
-	void *data;
-	void *data_end;
-	__u32 apply_bytes;
-	__u32 cork_bytes;
-	int sg_copybreak;
-	int sg_start;
-	int sg_curr;
-	int sg_end;
-	struct scatterlist sg_data[MAX_SKB_FRAGS];
-	bool sg_copy[MAX_SKB_FRAGS];
-	__u32 flags;
-	struct sock *sk_redir;
-	struct sock *sk;
-	struct sk_buff *skb;
-	struct list_head list;
-};
-
 struct bpf_redirect_info {
 	u32 ifindex;
 	u32 flags;
@@ -833,9 +815,6 @@ void xdp_do_flush_map(void);
 
 void bpf_warn_invalid_xdp_action(u32 act);
 
-struct sock *do_sk_redirect_map(struct sk_buff *skb);
-struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
-
 #ifdef CONFIG_INET
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 				  struct bpf_prog *prog, struct sk_buff *skb,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
new file mode 100644
index 000000000000..0b919f0bc6d6
--- /dev/null
+++ b/include/linux/skmsg.h
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#ifndef _LINUX_SKMSG_H
+#define _LINUX_SKMSG_H
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/strparser.h>
+
+#define MAX_MSG_FRAGS			MAX_SKB_FRAGS
+
+enum __sk_action {
+	__SK_DROP = 0,
+	__SK_PASS,
+	__SK_REDIRECT,
+	__SK_NONE,
+};
+
+struct sk_msg_sg {
+	u32				start;
+	u32				curr;
+	u32				end;
+	u32				size;
+	u32				copybreak;
+	bool				copy[MAX_MSG_FRAGS];
+	/* The extra element is used for chaining the front and sections when
+	 * the list becomes partitioned (e.g. end < start). The crypto APIs
+	 * require the chaining.
+	 */
+	struct scatterlist		data[MAX_MSG_FRAGS + 1];
+};
+
+struct sk_msg {
+	struct sk_msg_sg		sg;
+	void				*data;
+	void				*data_end;
+	u32				apply_bytes;
+	u32				cork_bytes;
+	u32				flags;
+	struct sk_buff			*skb;
+	struct sock			*sk_redir;
+	struct sock			*sk;
+	struct list_head		list;
+};
+
+struct sk_psock_progs {
+	struct bpf_prog			*msg_parser;
+	struct bpf_prog			*skb_parser;
+	struct bpf_prog			*skb_verdict;
+};
+
+enum sk_psock_state_bits {
+	SK_PSOCK_TX_ENABLED,
+};
+
+struct sk_psock_link {
+	struct list_head		list;
+	struct bpf_map			*map;
+	void				*link_raw;
+};
+
+struct sk_psock_parser {
+	struct strparser		strp;
+	bool				enabled;
+	void (*saved_data_ready)(struct sock *sk);
+};
+
+struct sk_psock_work_state {
+	struct sk_buff			*skb;
+	u32				len;
+	u32				off;
+};
+
+struct sk_psock {
+	struct sock			*sk;
+	struct sock			*sk_redir;
+	u32				apply_bytes;
+	u32				cork_bytes;
+	u32				eval;
+	struct sk_msg			*cork;
+	struct sk_psock_progs		progs;
+	struct sk_psock_parser		parser;
+	struct sk_buff_head		ingress_skb;
+	struct list_head		ingress_msg;
+	unsigned long			state;
+	struct list_head		link;
+	spinlock_t			link_lock;
+	refcount_t			refcnt;
+	void (*saved_unhash)(struct sock *sk);
+	void (*saved_close)(struct sock *sk, long timeout);
+	void (*saved_write_space)(struct sock *sk);
+	struct proto			*sk_proto;
+	struct sk_psock_work_state	work_state;
+	struct work_struct		work;
+	union {
+		struct rcu_head		rcu;
+		struct work_struct	gc;
+	};
+};
+
+int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+		 int elem_first_coalesce);
+int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
+		 u32 off, u32 len);
+void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
+int sk_msg_free(struct sock *sk, struct sk_msg *msg);
+int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
+void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes);
+void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes);
+
+void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
+void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes);
+
+int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      struct sk_msg *msg, u32 bytes);
+int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     struct sk_msg *msg, u32 bytes);
+
+static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
+{
+	WARN_ON(i == msg->sg.end && bytes);
+}
+
+static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
+{
+	if (psock->apply_bytes) {
+		if (psock->apply_bytes < bytes)
+			psock->apply_bytes = 0;
+		else
+			psock->apply_bytes -= bytes;
+	}
+}
+
+#define sk_msg_iter_var_prev(var)			\
+	do {						\
+		if (var == 0)				\
+			var = MAX_MSG_FRAGS - 1;	\
+		else					\
+			var--;				\
+	} while (0)
+
+#define sk_msg_iter_var_next(var)			\
+	do {						\
+		var++;					\
+		if (var == MAX_MSG_FRAGS)		\
+			var = 0;			\
+	} while (0)
+
+#define sk_msg_iter_prev(msg, which)			\
+	sk_msg_iter_var_prev(msg->sg.which)
+
+#define sk_msg_iter_next(msg, which)			\
+	sk_msg_iter_var_next(msg->sg.which)
+
+static inline void sk_msg_clear_meta(struct sk_msg *msg)
+{
+	memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
+}
+
+static inline void sk_msg_init(struct sk_msg *msg)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS);
+	memset(msg, 0, sizeof(*msg));
+	sg_init_marker(msg->sg.data, MAX_MSG_FRAGS);
+}
+
+static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
+			       int which, u32 size)
+{
+	dst->sg.data[which] = src->sg.data[which];
+	dst->sg.data[which].length  = size;
+	src->sg.data[which].length -= size;
+	src->sg.data[which].offset += size;
+}
+
+static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
+{
+	memcpy(dst, src, sizeof(*src));
+	sk_msg_init(src);
+}
+
+static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
+{
+	return msg->sg.end >= msg->sg.start ?
+		msg->sg.end - msg->sg.start :
+		msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
+}
+
+static inline bool sk_msg_full(const struct sk_msg *msg)
+{
+	return (msg->sg.end == msg->sg.start) && msg->sg.size;
+}
+
+static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
+{
+	return &msg->sg.data[which];
+}
+
+static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
+{
+	return sg_page(sk_msg_elem(msg, which));
+}
+
+static inline bool sk_msg_to_ingress(const struct sk_msg *msg)
+{
+	return msg->flags & BPF_F_INGRESS;
+}
+
+static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
+
+	if (msg->sg.copy[msg->sg.start]) {
+		msg->data = NULL;
+		msg->data_end = NULL;
+	} else {
+		msg->data = sg_virt(sge);
+		msg->data_end = msg->data + sge->length;
+	}
+}
+
+static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
+				   u32 len, u32 offset)
+{
+	struct scatterlist *sge;
+
+	get_page(page);
+	sge = sk_msg_elem(msg, msg->sg.end);
+	sg_set_page(sge, page, len, offset);
+	sg_unmark_end(sge);
+
+	msg->sg.copy[msg->sg.end] = true;
+	msg->sg.size += len;
+	sk_msg_iter_next(msg, end);
+}
+
+static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
+{
+	do {
+		msg->sg.copy[i] = copy_state;
+		sk_msg_iter_var_next(i);
+		if (i == msg->sg.end)
+			break;
+	} while (1);
+}
+
+static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start)
+{
+	sk_msg_sg_copy(msg, start, true);
+}
+
+static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start)
+{
+	sk_msg_sg_copy(msg, start, false);
+}
+
+static inline struct sk_psock *sk_psock(const struct sock *sk)
+{
+	return rcu_dereference_sk_user_data(sk);
+}
+
+static inline bool sk_has_psock(struct sock *sk)
+{
+	return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
+}
+
+static inline void sk_psock_queue_msg(struct sk_psock *psock,
+				      struct sk_msg *msg)
+{
+	list_add_tail(&msg->list, &psock->ingress_msg);
+}
+
+static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
+{
+	return psock ? list_empty(&psock->ingress_msg) : true;
+}
+
+static inline void sk_psock_report_error(struct sk_psock *psock, int err)
+{
+	struct sock *sk = psock->sk;
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+}
+
+struct sk_psock *sk_psock_init(struct sock *sk, int node);
+
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+
+int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+			 struct sk_msg *msg);
+
+static inline struct sk_psock_link *sk_psock_init_link(void)
+{
+	return kzalloc(sizeof(struct sk_psock_link),
+		       GFP_ATOMIC | __GFP_NOWARN);
+}
+
+static inline void sk_psock_free_link(struct sk_psock_link *link)
+{
+	kfree(link);
+}
+
+struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
+#if defined(CONFIG_BPF_STREAM_PARSER)
+void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
+#else
+static inline void sk_psock_unlink(struct sock *sk,
+				   struct sk_psock_link *link)
+{
+}
+#endif
+
+void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
+
+static inline void sk_psock_cork_free(struct sk_psock *psock)
+{
+	if (psock->cork) {
+		sk_msg_free(psock->sk, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
+	}
+}
+
+static inline void sk_psock_update_proto(struct sock *sk,
+					 struct sk_psock *psock,
+					 struct proto *ops)
+{
+	psock->saved_unhash = sk->sk_prot->unhash;
+	psock->saved_close = sk->sk_prot->close;
+	psock->saved_write_space = sk->sk_write_space;
+
+	psock->sk_proto = sk->sk_prot;
+	sk->sk_prot = ops;
+}
+
+static inline void sk_psock_restore_proto(struct sock *sk,
+					  struct sk_psock *psock)
+{
+	if (psock->sk_proto) {
+		sk->sk_prot = psock->sk_proto;
+		psock->sk_proto = NULL;
+	}
+}
+
+static inline void sk_psock_set_state(struct sk_psock *psock,
+				      enum sk_psock_state_bits bit)
+{
+	set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+					enum sk_psock_state_bits bit)
+{
+	clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+				       enum sk_psock_state_bits bit)
+{
+	return test_bit(bit, &psock->state);
+}
+
+static inline struct sk_psock *sk_psock_get(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock && !refcount_inc_not_zero(&psock->refcnt))
+		psock = NULL;
+	rcu_read_unlock();
+	return psock;
+}
+
+void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
+void sk_psock_destroy(struct rcu_head *rcu);
+void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
+
+static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
+{
+	if (refcount_dec_and_test(&psock->refcnt))
+		sk_psock_drop(sk, psock);
+}
+
+static inline void psock_set_prog(struct bpf_prog **pprog,
+				  struct bpf_prog *prog)
+{
+	prog = xchg(pprog, prog);
+	if (prog)
+		bpf_prog_put(prog);
+}
+
+static inline void psock_progs_drop(struct sk_psock_progs *progs)
+{
+	psock_set_prog(&progs->msg_parser, NULL);
+	psock_set_prog(&progs->skb_parser, NULL);
+	psock_set_prog(&progs->skb_verdict, NULL);
+}
+
+#endif /* _LINUX_SKMSG_H */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..14b789a123e7 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -265,6 +265,11 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
 struct ipv6_bpf_stub {
 	int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 			  bool force_bind_address_no_port, bool with_lock);
+	struct sock *(*udp6_lib_lookup)(struct net *net,
+					const struct in6_addr *saddr, __be16 sport,
+					const struct in6_addr *daddr, __be16 dport,
+					int dif, int sdif, struct udp_table *tbl,
+					struct sk_buff *skb);
 };
 extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
 
diff --git a/include/net/sock.h b/include/net/sock.h
index cfaf261936c8..2440f8b407eb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2214,10 +2214,6 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int sg_start, int *sg_curr, unsigned int *sg_size,
-		int first_coalesce);
-
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0d2929223c70..3600ae0f25c3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -858,6 +858,21 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
 }
 
+static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
+}
+
+static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->bpf.sk_redir;
+}
+
+static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* This is the variant of inet6_iif() that must be used by TCP,
  * as TCP moves IP6CB into a different location in skb->cb[]
@@ -2057,7 +2072,6 @@ struct tcp_ulp_ops {
 int tcp_register_ulp(struct tcp_ulp_ops *type);
 void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
-int tcp_set_ulp_id(struct sock *sk, const int ulp);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
 
@@ -2065,6 +2079,18 @@ void tcp_cleanup_ulp(struct sock *sk);
 	__MODULE_INFO(alias, alias_userspace, name);		\
 	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
 
+struct sk_msg;
+struct sk_psock;
+
+int tcp_bpf_init(struct sock *sk);
+void tcp_bpf_reinit(struct sock *sk);
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
+			  int flags);
+int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		    int nonblock, int flags, int *addr_len);
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+		      struct msghdr *msg, int len);
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
diff --git a/include/net/tls.h b/include/net/tls.h
index 5e853835597e..bab5627ff5e3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -39,6 +39,8 @@
 #include <linux/crypto.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
+#include <linux/skmsg.h>
+
 #include <net/tcp.h>
 #include <net/strparser.h>
 #include <crypto/aead.h>
@@ -103,15 +105,13 @@ struct tls_rec {
 	int tx_flags;
 	int inplace_crypto;
 
-	/* AAD | sg_plaintext_data | sg_tag */
-	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1];
-	/* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */
-	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS + 1];
+	struct sk_msg msg_plaintext;
+	struct sk_msg msg_encrypted;
 
-	unsigned int sg_plaintext_size;
-	unsigned int sg_encrypted_size;
-	int sg_plaintext_num_elem;
-	int sg_encrypted_num_elem;
+	/* AAD | msg_plaintext.sg.data | sg_tag */
+	struct scatterlist sg_aead_in[2];
+	/* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */
+	struct scatterlist sg_aead_out[2];
 
 	char aad_space[TLS_AAD_SPACE_SIZE];
 	struct aead_request aead_req;
@@ -142,8 +142,7 @@ struct tls_sw_context_rx {
 
 	struct strparser strp;
 	void (*saved_data_ready)(struct sock *sk);
-	unsigned int (*sk_poll)(struct file *file, struct socket *sock,
-				struct poll_table_struct *wait);
+
 	struct sk_buff *recv_pkt;
 	u8 control;
 	bool decrypted;
@@ -223,8 +222,8 @@ struct tls_context {
 
 	unsigned long flags;
 	bool in_tcp_sendpages;
+	bool pending_open_record_frags;
 
-	u16 pending_open_record_frags;
 	int (*push_pending_record)(struct sock *sk, int flags);
 
 	void (*sk_write_space)(struct sock *sk);
@@ -272,8 +271,7 @@ void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
-unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-			 struct poll_table_struct *wait);
+bool tls_sw_stream_read(const struct sock *sk);
 ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
 			   struct pipe_inode_info *pipe,
 			   size_t len, unsigned int flags);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..ff8262626b8f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -13,11 +13,6 @@ ifeq ($(CONFIG_XDP_SOCKETS),y)
 obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
 endif
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
-ifeq ($(CONFIG_STREAM_PARSER),y)
-ifeq ($(CONFIG_INET),y)
-obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
-endif
-endif
 endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index dded84cbe814..24583da9ffd1 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -449,7 +449,7 @@ static void fd_array_map_free(struct bpf_map *map)
 
 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
 {
-	return NULL;
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 /* only called from syscall */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 138f0302692e..378cef70341c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2114,6 +2114,9 @@ static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
 
 	hdr = &btf->hdr;
 
+	if (hdr->hdr_len != hdr_len)
+		return -EINVAL;
+
 	btf_verifier_log_hdr(env, btf_data_size);
 
 	if (hdr->magic != BTF_MAGIC) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..defcf4df6d91 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1792,8 +1792,6 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
-const struct bpf_func_proto bpf_sock_map_update_proto __weak;
-const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
deleted file mode 100644
index d37a1a0a6e1e..000000000000
--- a/kernel/bpf/sockmap.c
+++ /dev/null
@@ -1,2629 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-/* A BPF sock_map is used to store sock objects. This is primarly used
- * for doing socket redirect with BPF helper routines.
- *
- * A sock map may have BPF programs attached to it, currently a program
- * used to parse packets and a program to provide a verdict and redirect
- * decision on the packet are supported. Any programs attached to a sock
- * map are inherited by sock objects when they are added to the map. If
- * no BPF programs are attached the sock object may only be used for sock
- * redirect.
- *
- * A sock object may be in multiple maps, but can only inherit a single
- * parse or verdict program. If adding a sock object to a map would result
- * in having multiple parsing programs the update will return an EBUSY error.
- *
- * For reference this program is similar to devmap used in XDP context
- * reviewing these together may be useful. For an example please review
- * ./samples/bpf/sockmap/.
- */
-#include <linux/bpf.h>
-#include <net/sock.h>
-#include <linux/filter.h>
-#include <linux/errno.h>
-#include <linux/file.h>
-#include <linux/kernel.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <net/strparser.h>
-#include <net/tcp.h>
-#include <linux/ptr_ring.h>
-#include <net/inet_common.h>
-#include <linux/sched/signal.h>
-
-#define SOCK_CREATE_FLAG_MASK \
-	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
-
-struct bpf_sock_progs {
-	struct bpf_prog *bpf_tx_msg;
-	struct bpf_prog *bpf_parse;
-	struct bpf_prog *bpf_verdict;
-};
-
-struct bpf_stab {
-	struct bpf_map map;
-	struct sock **sock_map;
-	struct bpf_sock_progs progs;
-	raw_spinlock_t lock;
-};
-
-struct bucket {
-	struct hlist_head head;
-	raw_spinlock_t lock;
-};
-
-struct bpf_htab {
-	struct bpf_map map;
-	struct bucket *buckets;
-	atomic_t count;
-	u32 n_buckets;
-	u32 elem_size;
-	struct bpf_sock_progs progs;
-	struct rcu_head rcu;
-};
-
-struct htab_elem {
-	struct rcu_head rcu;
-	struct hlist_node hash_node;
-	u32 hash;
-	struct sock *sk;
-	char key[0];
-};
-
-enum smap_psock_state {
-	SMAP_TX_RUNNING,
-};
-
-struct smap_psock_map_entry {
-	struct list_head list;
-	struct bpf_map *map;
-	struct sock **entry;
-	struct htab_elem __rcu *hash_link;
-};
-
-struct smap_psock {
-	struct rcu_head	rcu;
-	refcount_t refcnt;
-
-	/* datapath variables */
-	struct sk_buff_head rxqueue;
-	bool strp_enabled;
-
-	/* datapath error path cache across tx work invocations */
-	int save_rem;
-	int save_off;
-	struct sk_buff *save_skb;
-
-	/* datapath variables for tx_msg ULP */
-	struct sock *sk_redir;
-	int apply_bytes;
-	int cork_bytes;
-	int sg_size;
-	int eval;
-	struct sk_msg_buff *cork;
-	struct list_head ingress;
-
-	struct strparser strp;
-	struct bpf_prog *bpf_tx_msg;
-	struct bpf_prog *bpf_parse;
-	struct bpf_prog *bpf_verdict;
-	struct list_head maps;
-	spinlock_t maps_lock;
-
-	/* Back reference used when sock callback trigger sockmap operations */
-	struct sock *sock;
-	unsigned long state;
-
-	struct work_struct tx_work;
-	struct work_struct gc_work;
-
-	struct proto *sk_proto;
-	void (*save_unhash)(struct sock *sk);
-	void (*save_close)(struct sock *sk, long timeout);
-	void (*save_data_ready)(struct sock *sk);
-	void (*save_write_space)(struct sock *sk);
-};
-
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
-			   int nonblock, int flags, int *addr_len);
-static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
-			    int offset, size_t size, int flags);
-static void bpf_tcp_unhash(struct sock *sk);
-static void bpf_tcp_close(struct sock *sk, long timeout);
-
-static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
-{
-	return rcu_dereference_sk_user_data(sk);
-}
-
-static bool bpf_tcp_stream_read(const struct sock *sk)
-{
-	struct smap_psock *psock;
-	bool empty = true;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out;
-	empty = list_empty(&psock->ingress);
-out:
-	rcu_read_unlock();
-	return !empty;
-}
-
-enum {
-	SOCKMAP_IPV4,
-	SOCKMAP_IPV6,
-	SOCKMAP_NUM_PROTS,
-};
-
-enum {
-	SOCKMAP_BASE,
-	SOCKMAP_TX,
-	SOCKMAP_NUM_CONFIGS,
-};
-
-static struct proto *saved_tcpv6_prot __read_mostly;
-static DEFINE_SPINLOCK(tcpv6_prot_lock);
-static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
-static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
-			 struct proto *base)
-{
-	prot[SOCKMAP_BASE]			= *base;
-	prot[SOCKMAP_BASE].unhash		= bpf_tcp_unhash;
-	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
-	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
-	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
-
-	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
-	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
-	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
-}
-
-static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
-{
-	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
-	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
-
-	sk->sk_prot = &bpf_tcp_prots[family][conf];
-}
-
-static int bpf_tcp_init(struct sock *sk)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		return -EINVAL;
-	}
-
-	if (unlikely(psock->sk_proto)) {
-		rcu_read_unlock();
-		return -EBUSY;
-	}
-
-	psock->save_unhash = sk->sk_prot->unhash;
-	psock->save_close = sk->sk_prot->close;
-	psock->sk_proto = sk->sk_prot;
-
-	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
-	if (sk->sk_family == AF_INET6 &&
-	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
-		spin_lock_bh(&tcpv6_prot_lock);
-		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
-			build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
-			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
-		}
-		spin_unlock_bh(&tcpv6_prot_lock);
-	}
-	update_sk_prot(sk, psock);
-	rcu_read_unlock();
-	return 0;
-}
-
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);
-
-static void bpf_tcp_release(struct sock *sk)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out;
-
-	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork, true);
-		kfree(psock->cork);
-		psock->cork = NULL;
-	}
-
-	if (psock->sk_proto) {
-		sk->sk_prot = psock->sk_proto;
-		psock->sk_proto = NULL;
-	}
-out:
-	rcu_read_unlock();
-}
-
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
-					 u32 hash, void *key, u32 key_size)
-{
-	struct htab_elem *l;
-
-	hlist_for_each_entry_rcu(l, head, hash_node) {
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
-			return l;
-	}
-
-	return NULL;
-}
-
-static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &htab->buckets[hash & (htab->n_buckets - 1)];
-}
-
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &__select_bucket(htab, hash)->head;
-}
-
-static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
-{
-	atomic_dec(&htab->count);
-	kfree_rcu(l, rcu);
-}
-
-static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
-						  struct smap_psock *psock)
-{
-	struct smap_psock_map_entry *e;
-
-	spin_lock_bh(&psock->maps_lock);
-	e = list_first_entry_or_null(&psock->maps,
-				     struct smap_psock_map_entry,
-				     list);
-	if (e)
-		list_del(&e->list);
-	spin_unlock_bh(&psock->maps_lock);
-	return e;
-}
-
-static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
-{
-	struct smap_psock_map_entry *e;
-	struct sk_msg_buff *md, *mtmp;
-	struct sock *osk;
-
-	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork, true);
-		kfree(psock->cork);
-		psock->cork = NULL;
-	}
-
-	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
-		list_del(&md->list);
-		free_start_sg(psock->sock, md, true);
-		kfree(md);
-	}
-
-	e = psock_map_pop(sk, psock);
-	while (e) {
-		if (e->entry) {
-			struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);
-
-			raw_spin_lock_bh(&stab->lock);
-			osk = *e->entry;
-			if (osk == sk) {
-				*e->entry = NULL;
-				smap_release_sock(psock, sk);
-			}
-			raw_spin_unlock_bh(&stab->lock);
-		} else {
-			struct htab_elem *link = rcu_dereference(e->hash_link);
-			struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
-			struct hlist_head *head;
-			struct htab_elem *l;
-			struct bucket *b;
-
-			b = __select_bucket(htab, link->hash);
-			head = &b->head;
-			raw_spin_lock_bh(&b->lock);
-			l = lookup_elem_raw(head,
-					    link->hash, link->key,
-					    htab->map.key_size);
-			/* If another thread deleted this object skip deletion.
-			 * The refcnt on psock may or may not be zero.
-			 */
-			if (l && l == link) {
-				hlist_del_rcu(&link->hash_node);
-				smap_release_sock(psock, link->sk);
-				free_htab_elem(htab, link);
-			}
-			raw_spin_unlock_bh(&b->lock);
-		}
-		kfree(e);
-		e = psock_map_pop(sk, psock);
-	}
-}
-
-static void bpf_tcp_unhash(struct sock *sk)
-{
-	void (*unhash_fun)(struct sock *sk);
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		if (sk->sk_prot->unhash)
-			sk->sk_prot->unhash(sk);
-		return;
-	}
-	unhash_fun = psock->save_unhash;
-	bpf_tcp_remove(sk, psock);
-	rcu_read_unlock();
-	unhash_fun(sk);
-}
-
-static void bpf_tcp_close(struct sock *sk, long timeout)
-{
-	void (*close_fun)(struct sock *sk, long timeout);
-	struct smap_psock *psock;
-
-	lock_sock(sk);
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		release_sock(sk);
-		return sk->sk_prot->close(sk, timeout);
-	}
-	close_fun = psock->save_close;
-	bpf_tcp_remove(sk, psock);
-	rcu_read_unlock();
-	release_sock(sk);
-	close_fun(sk, timeout);
-}
-
-enum __sk_action {
-	__SK_DROP = 0,
-	__SK_PASS,
-	__SK_REDIRECT,
-	__SK_NONE,
-};
-
-static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
-	.name		= "bpf_tcp",
-	.uid		= TCP_ULP_BPF,
-	.user_visible	= false,
-	.owner		= NULL,
-	.init		= bpf_tcp_init,
-	.release	= bpf_tcp_release,
-};
-
-static int memcopy_from_iter(struct sock *sk,
-			     struct sk_msg_buff *md,
-			     struct iov_iter *from, int bytes)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = md->sg_curr, rc = -ENOSPC;
-
-	do {
-		int copy;
-		char *to;
-
-		if (md->sg_copybreak >= sg[i].length) {
-			md->sg_copybreak = 0;
-
-			if (++i == MAX_SKB_FRAGS)
-				i = 0;
-
-			if (i == md->sg_end)
-				break;
-		}
-
-		copy = sg[i].length - md->sg_copybreak;
-		to = sg_virt(&sg[i]) + md->sg_copybreak;
-		md->sg_copybreak += copy;
-
-		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
-			rc = copy_from_iter_nocache(to, copy, from);
-		else
-			rc = copy_from_iter(to, copy, from);
-
-		if (rc != copy) {
-			rc = -EFAULT;
-			goto out;
-		}
-
-		bytes -= copy;
-		if (!bytes)
-			break;
-
-		md->sg_copybreak = 0;
-		if (++i == MAX_SKB_FRAGS)
-			i = 0;
-	} while (i != md->sg_end);
-out:
-	md->sg_curr = i;
-	return rc;
-}
-
-static int bpf_tcp_push(struct sock *sk, int apply_bytes,
-			struct sk_msg_buff *md,
-			int flags, bool uncharge)
-{
-	bool apply = apply_bytes;
-	struct scatterlist *sg;
-	int offset, ret = 0;
-	struct page *p;
-	size_t size;
-
-	while (1) {
-		sg = md->sg_data + md->sg_start;
-		size = (apply && apply_bytes < sg->length) ?
-			apply_bytes : sg->length;
-		offset = sg->offset;
-
-		tcp_rate_check_app_limited(sk);
-		p = sg_page(sg);
-retry:
-		ret = do_tcp_sendpages(sk, p, offset, size, flags);
-		if (ret != size) {
-			if (ret > 0) {
-				if (apply)
-					apply_bytes -= ret;
-
-				sg->offset += ret;
-				sg->length -= ret;
-				size -= ret;
-				offset += ret;
-				if (uncharge)
-					sk_mem_uncharge(sk, ret);
-				goto retry;
-			}
-
-			return ret;
-		}
-
-		if (apply)
-			apply_bytes -= ret;
-		sg->offset += ret;
-		sg->length -= ret;
-		if (uncharge)
-			sk_mem_uncharge(sk, ret);
-
-		if (!sg->length) {
-			put_page(p);
-			md->sg_start++;
-			if (md->sg_start == MAX_SKB_FRAGS)
-				md->sg_start = 0;
-			sg_init_table(sg, 1);
-
-			if (md->sg_start == md->sg_end)
-				break;
-		}
-
-		if (apply && !apply_bytes)
-			break;
-	}
-	return 0;
-}
-
-static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
-{
-	struct scatterlist *sg = md->sg_data + md->sg_start;
-
-	if (md->sg_copy[md->sg_start]) {
-		md->data = md->data_end = 0;
-	} else {
-		md->data = sg_virt(sg);
-		md->data_end = md->data + sg->length;
-	}
-}
-
-static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = md->sg_start;
-
-	do {
-		int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
-
-		sk_mem_uncharge(sk, uncharge);
-		bytes -= uncharge;
-		if (!bytes)
-			break;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
-	} while (i != md->sg_end);
-}
-
-static void free_bytes_sg(struct sock *sk, int bytes,
-			  struct sk_msg_buff *md, bool charge)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = md->sg_start, free;
-
-	while (bytes && sg[i].length) {
-		free = sg[i].length;
-		if (bytes < free) {
-			sg[i].length -= bytes;
-			sg[i].offset += bytes;
-			if (charge)
-				sk_mem_uncharge(sk, bytes);
-			break;
-		}
-
-		if (charge)
-			sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-		bytes -= sg[i].length;
-		sg[i].length = 0;
-		sg[i].page_link = 0;
-		sg[i].offset = 0;
-		i++;
-
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
-	}
-	md->sg_start = i;
-}
-
-static int free_sg(struct sock *sk, int start,
-		   struct sk_msg_buff *md, bool charge)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = start, free = 0;
-
-	while (sg[i].length) {
-		free += sg[i].length;
-		if (charge)
-			sk_mem_uncharge(sk, sg[i].length);
-		if (!md->skb)
-			put_page(sg_page(&sg[i]));
-		sg[i].length = 0;
-		sg[i].page_link = 0;
-		sg[i].offset = 0;
-		i++;
-
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
-	}
-	consume_skb(md->skb);
-
-	return free;
-}
-
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
-{
-	int free = free_sg(sk, md->sg_start, md, charge);
-
-	md->sg_start = md->sg_end;
-	return free;
-}
-
-static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
-{
-	return free_sg(sk, md->sg_curr, md, true);
-}
-
-static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
-{
-	return ((_rc == SK_PASS) ?
-	       (md->sk_redir ? __SK_REDIRECT : __SK_PASS) :
-	       __SK_DROP);
-}
-
-static unsigned int smap_do_tx_msg(struct sock *sk,
-				   struct smap_psock *psock,
-				   struct sk_msg_buff *md)
-{
-	struct bpf_prog *prog;
-	unsigned int rc, _rc;
-
-	preempt_disable();
-	rcu_read_lock();
-
-	/* If the policy was removed mid-send then default to 'accept' */
-	prog = READ_ONCE(psock->bpf_tx_msg);
-	if (unlikely(!prog)) {
-		_rc = SK_PASS;
-		goto verdict;
-	}
-
-	bpf_compute_data_pointers_sg(md);
-	md->sk = sk;
-	rc = (*prog->bpf_func)(md, prog->insnsi);
-	psock->apply_bytes = md->apply_bytes;
-
-	/* Moving return codes from UAPI namespace into internal namespace */
-	_rc = bpf_map_msg_verdict(rc, md);
-
-	/* The psock has a refcount on the sock but not on the map and because
-	 * we need to drop rcu read lock here its possible the map could be
-	 * removed between here and when we need it to execute the sock
-	 * redirect. So do the map lookup now for future use.
-	 */
-	if (_rc == __SK_REDIRECT) {
-		if (psock->sk_redir)
-			sock_put(psock->sk_redir);
-		psock->sk_redir = do_msg_redirect_map(md);
-		if (!psock->sk_redir) {
-			_rc = __SK_DROP;
-			goto verdict;
-		}
-		sock_hold(psock->sk_redir);
-	}
-verdict:
-	rcu_read_unlock();
-	preempt_enable();
-
-	return _rc;
-}
-
-static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
-			   struct smap_psock *psock,
-			   struct sk_msg_buff *md, int flags)
-{
-	bool apply = apply_bytes;
-	size_t size, copied = 0;
-	struct sk_msg_buff *r;
-	int err = 0, i;
-
-	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
-	if (unlikely(!r))
-		return -ENOMEM;
-
-	lock_sock(sk);
-	r->sg_start = md->sg_start;
-	i = md->sg_start;
-
-	do {
-		size = (apply && apply_bytes < md->sg_data[i].length) ?
-			apply_bytes : md->sg_data[i].length;
-
-		if (!sk_wmem_schedule(sk, size)) {
-			if (!copied)
-				err = -ENOMEM;
-			break;
-		}
-
-		sk_mem_charge(sk, size);
-		r->sg_data[i] = md->sg_data[i];
-		r->sg_data[i].length = size;
-		md->sg_data[i].length -= size;
-		md->sg_data[i].offset += size;
-		copied += size;
-
-		if (md->sg_data[i].length) {
-			get_page(sg_page(&r->sg_data[i]));
-			r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
-		} else {
-			i++;
-			if (i == MAX_SKB_FRAGS)
-				i = 0;
-			r->sg_end = i;
-		}
-
-		if (apply) {
-			apply_bytes -= size;
-			if (!apply_bytes)
-				break;
-		}
-	} while (i != md->sg_end);
-
-	md->sg_start = i;
-
-	if (!err) {
-		list_add_tail(&r->list, &psock->ingress);
-		sk->sk_data_ready(sk);
-	} else {
-		free_start_sg(sk, r, true);
-		kfree(r);
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
-				       struct sk_msg_buff *md,
-				       int flags)
-{
-	bool ingress = !!(md->flags & BPF_F_INGRESS);
-	struct smap_psock *psock;
-	int err = 0;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out_rcu;
-
-	if (!refcount_inc_not_zero(&psock->refcnt))
-		goto out_rcu;
-
-	rcu_read_unlock();
-
-	if (ingress) {
-		err = bpf_tcp_ingress(sk, send, psock, md, flags);
-	} else {
-		lock_sock(sk);
-		err = bpf_tcp_push(sk, send, md, flags, false);
-		release_sock(sk);
-	}
-	smap_release_sock(psock, sk);
-	return err;
-out_rcu:
-	rcu_read_unlock();
-	return 0;
-}
-
-static inline void bpf_md_init(struct smap_psock *psock)
-{
-	if (!psock->apply_bytes) {
-		psock->eval =  __SK_NONE;
-		if (psock->sk_redir) {
-			sock_put(psock->sk_redir);
-			psock->sk_redir = NULL;
-		}
-	}
-}
-
-static void apply_bytes_dec(struct smap_psock *psock, int i)
-{
-	if (psock->apply_bytes) {
-		if (psock->apply_bytes < i)
-			psock->apply_bytes = 0;
-		else
-			psock->apply_bytes -= i;
-	}
-}
-
-static int bpf_exec_tx_verdict(struct smap_psock *psock,
-			       struct sk_msg_buff *m,
-			       struct sock *sk,
-			       int *copied, int flags)
-{
-	bool cork = false, enospc = (m->sg_start == m->sg_end);
-	struct sock *redir;
-	int err = 0;
-	int send;
-
-more_data:
-	if (psock->eval == __SK_NONE)
-		psock->eval = smap_do_tx_msg(sk, psock, m);
-
-	if (m->cork_bytes &&
-	    m->cork_bytes > psock->sg_size && !enospc) {
-		psock->cork_bytes = m->cork_bytes - psock->sg_size;
-		if (!psock->cork) {
-			psock->cork = kcalloc(1,
-					sizeof(struct sk_msg_buff),
-					GFP_ATOMIC | __GFP_NOWARN);
-
-			if (!psock->cork) {
-				err = -ENOMEM;
-				goto out_err;
-			}
-		}
-		memcpy(psock->cork, m, sizeof(*m));
-		goto out_err;
-	}
-
-	send = psock->sg_size;
-	if (psock->apply_bytes && psock->apply_bytes < send)
-		send = psock->apply_bytes;
-
-	switch (psock->eval) {
-	case __SK_PASS:
-		err = bpf_tcp_push(sk, send, m, flags, true);
-		if (unlikely(err)) {
-			*copied -= free_start_sg(sk, m, true);
-			break;
-		}
-
-		apply_bytes_dec(psock, send);
-		psock->sg_size -= send;
-		break;
-	case __SK_REDIRECT:
-		redir = psock->sk_redir;
-		apply_bytes_dec(psock, send);
-
-		if (psock->cork) {
-			cork = true;
-			psock->cork = NULL;
-		}
-
-		return_mem_sg(sk, send, m);
-		release_sock(sk);
-
-		err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
-		lock_sock(sk);
-
-		if (unlikely(err < 0)) {
-			int free = free_start_sg(sk, m, false);
-
-			psock->sg_size = 0;
-			if (!cork)
-				*copied -= free;
-		} else {
-			psock->sg_size -= send;
-		}
-
-		if (cork) {
-			free_start_sg(sk, m, true);
-			psock->sg_size = 0;
-			kfree(m);
-			m = NULL;
-			err = 0;
-		}
-		break;
-	case __SK_DROP:
-	default:
-		free_bytes_sg(sk, send, m, true);
-		apply_bytes_dec(psock, send);
-		*copied -= send;
-		psock->sg_size -= send;
-		err = -EACCES;
-		break;
-	}
-
-	if (likely(!err)) {
-		bpf_md_init(psock);
-		if (m &&
-		    m->sg_data[m->sg_start].page_link &&
-		    m->sg_data[m->sg_start].length)
-			goto more_data;
-	}
-
-out_err:
-	return err;
-}
-
-static int bpf_wait_data(struct sock *sk,
-			 struct smap_psock *psk, int flags,
-			 long timeo, int *err)
-{
-	int rc;
-
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	rc = sk_wait_event(sk, &timeo,
-			   !list_empty(&psk->ingress) ||
-			   !skb_queue_empty(&sk->sk_receive_queue),
-			   &wait);
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	return rc;
-}
-
-static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
-			   int nonblock, int flags, int *addr_len)
-{
-	struct iov_iter *iter = &msg->msg_iter;
-	struct smap_psock *psock;
-	int copied = 0;
-
-	if (unlikely(flags & MSG_ERRQUEUE))
-		return inet_recv_error(sk, msg, len, addr_len);
-	if (!skb_queue_empty(&sk->sk_receive_queue))
-		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out;
-
-	if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
-		goto out;
-	rcu_read_unlock();
-
-	lock_sock(sk);
-bytes_ready:
-	while (copied != len) {
-		struct scatterlist *sg;
-		struct sk_msg_buff *md;
-		int i;
-
-		md = list_first_entry_or_null(&psock->ingress,
-					      struct sk_msg_buff, list);
-		if (unlikely(!md))
-			break;
-		i = md->sg_start;
-		do {
-			struct page *page;
-			int n, copy;
-
-			sg = &md->sg_data[i];
-			copy = sg->length;
-			page = sg_page(sg);
-
-			if (copied + copy > len)
-				copy = len - copied;
-
-			n = copy_page_to_iter(page, sg->offset, copy, iter);
-			if (n != copy) {
-				md->sg_start = i;
-				release_sock(sk);
-				smap_release_sock(psock, sk);
-				return -EFAULT;
-			}
-
-			copied += copy;
-			sg->offset += copy;
-			sg->length -= copy;
-			sk_mem_uncharge(sk, copy);
-
-			if (!sg->length) {
-				i++;
-				if (i == MAX_SKB_FRAGS)
-					i = 0;
-				if (!md->skb)
-					put_page(page);
-			}
-			if (copied == len)
-				break;
-		} while (i != md->sg_end);
-		md->sg_start = i;
-
-		if (!sg->length && md->sg_start == md->sg_end) {
-			list_del(&md->list);
-			consume_skb(md->skb);
-			kfree(md);
-		}
-	}
-
-	if (!copied) {
-		long timeo;
-		int data;
-		int err = 0;
-
-		timeo = sock_rcvtimeo(sk, nonblock);
-		data = bpf_wait_data(sk, psock, flags, timeo, &err);
-
-		if (data) {
-			if (!skb_queue_empty(&sk->sk_receive_queue)) {
-				release_sock(sk);
-				smap_release_sock(psock, sk);
-				copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-				return copied;
-			}
-			goto bytes_ready;
-		}
-
-		if (err)
-			copied = err;
-	}
-
-	release_sock(sk);
-	smap_release_sock(psock, sk);
-	return copied;
-out:
-	rcu_read_unlock();
-	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-}
-
-
-static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-{
-	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
-	struct sk_msg_buff md = {0};
-	unsigned int sg_copy = 0;
-	struct smap_psock *psock;
-	int copied = 0, err = 0;
-	struct scatterlist *sg;
-	long timeo;
-
-	/* Its possible a sock event or user removed the psock _but_ the ops
-	 * have not been reprogrammed yet so we get here. In this case fallback
-	 * to tcp_sendmsg. Note this only works because we _only_ ever allow
-	 * a single ULP there is no hierarchy here.
-	 */
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		return tcp_sendmsg(sk, msg, size);
-	}
-
-	/* Increment the psock refcnt to ensure its not released while sending a
-	 * message. Required because sk lookup and bpf programs are used in
-	 * separate rcu critical sections. Its OK if we lose the map entry
-	 * but we can't lose the sock reference.
-	 */
-	if (!refcount_inc_not_zero(&psock->refcnt)) {
-		rcu_read_unlock();
-		return tcp_sendmsg(sk, msg, size);
-	}
-
-	sg = md.sg_data;
-	sg_init_marker(sg, MAX_SKB_FRAGS);
-	rcu_read_unlock();
-
-	lock_sock(sk);
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-
-	while (msg_data_left(msg)) {
-		struct sk_msg_buff *m = NULL;
-		bool enospc = false;
-		int copy;
-
-		if (sk->sk_err) {
-			err = -sk->sk_err;
-			goto out_err;
-		}
-
-		copy = msg_data_left(msg);
-		if (!sk_stream_memory_free(sk))
-			goto wait_for_sndbuf;
-
-		m = psock->cork_bytes ? psock->cork : &md;
-		m->sg_curr = m->sg_copybreak ? m->sg_curr : m->sg_end;
-		err = sk_alloc_sg(sk, copy, m->sg_data,
-				  m->sg_start, &m->sg_end, &sg_copy,
-				  m->sg_end - 1);
-		if (err) {
-			if (err != -ENOSPC)
-				goto wait_for_memory;
-			enospc = true;
-			copy = sg_copy;
-		}
-
-		err = memcopy_from_iter(sk, m, &msg->msg_iter, copy);
-		if (err < 0) {
-			free_curr_sg(sk, m);
-			goto out_err;
-		}
-
-		psock->sg_size += copy;
-		copied += copy;
-		sg_copy = 0;
-
-		/* When bytes are being corked skip running BPF program and
-		 * applying verdict unless there is no more buffer space. In
-		 * the ENOSPC case simply run BPF prorgram with currently
-		 * accumulated data. We don't have much choice at this point
-		 * we could try extending the page frags or chaining complex
-		 * frags but even in these cases _eventually_ we will hit an
-		 * OOM scenario. More complex recovery schemes may be
-		 * implemented in the future, but BPF programs must handle
-		 * the case where apply_cork requests are not honored. The
-		 * canonical method to verify this is to check data length.
-		 */
-		if (psock->cork_bytes) {
-			if (copy > psock->cork_bytes)
-				psock->cork_bytes = 0;
-			else
-				psock->cork_bytes -= copy;
-
-			if (psock->cork_bytes && !enospc)
-				goto out_cork;
-
-			/* All cork bytes accounted for re-run filter */
-			psock->eval = __SK_NONE;
-			psock->cork_bytes = 0;
-		}
-
-		err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
-		if (unlikely(err < 0))
-			goto out_err;
-		continue;
-wait_for_sndbuf:
-		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
-		err = sk_stream_wait_memory(sk, &timeo);
-		if (err) {
-			if (m && m != psock->cork)
-				free_start_sg(sk, m, true);
-			goto out_err;
-		}
-	}
-out_err:
-	if (err < 0)
-		err = sk_stream_error(sk, msg->msg_flags, err);
-out_cork:
-	release_sock(sk);
-	smap_release_sock(psock, sk);
-	return copied ? copied : err;
-}
-
-static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
-			    int offset, size_t size, int flags)
-{
-	struct sk_msg_buff md = {0}, *m = NULL;
-	int err = 0, copied = 0;
-	struct smap_psock *psock;
-	struct scatterlist *sg;
-	bool enospc = false;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto accept;
-
-	if (!refcount_inc_not_zero(&psock->refcnt))
-		goto accept;
-	rcu_read_unlock();
-
-	lock_sock(sk);
-
-	if (psock->cork_bytes) {
-		m = psock->cork;
-		sg = &m->sg_data[m->sg_end];
-	} else {
-		m = &md;
-		sg = m->sg_data;
-		sg_init_marker(sg, MAX_SKB_FRAGS);
-	}
-
-	/* Catch case where ring is full and sendpage is stalled. */
-	if (unlikely(m->sg_end == m->sg_start &&
-	    m->sg_data[m->sg_end].length))
-		goto out_err;
-
-	psock->sg_size += size;
-	sg_set_page(sg, page, size, offset);
-	get_page(page);
-	m->sg_copy[m->sg_end] = true;
-	sk_mem_charge(sk, size);
-	m->sg_end++;
-	copied = size;
-
-	if (m->sg_end == MAX_SKB_FRAGS)
-		m->sg_end = 0;
-
-	if (m->sg_end == m->sg_start)
-		enospc = true;
-
-	if (psock->cork_bytes) {
-		if (size > psock->cork_bytes)
-			psock->cork_bytes = 0;
-		else
-			psock->cork_bytes -= size;
-
-		if (psock->cork_bytes && !enospc)
-			goto out_err;
-
-		/* All cork bytes accounted for re-run filter */
-		psock->eval = __SK_NONE;
-		psock->cork_bytes = 0;
-	}
-
-	err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
-out_err:
-	release_sock(sk);
-	smap_release_sock(psock, sk);
-	return copied ? copied : err;
-accept:
-	rcu_read_unlock();
-	return tcp_sendpage(sk, page, offset, size, flags);
-}
-
-static void bpf_tcp_msg_add(struct smap_psock *psock,
-			    struct sock *sk,
-			    struct bpf_prog *tx_msg)
-{
-	struct bpf_prog *orig_tx_msg;
-
-	orig_tx_msg = xchg(&psock->bpf_tx_msg, tx_msg);
-	if (orig_tx_msg)
-		bpf_prog_put(orig_tx_msg);
-}
-
-static int bpf_tcp_ulp_register(void)
-{
-	build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
-	/* Once BPF TX ULP is registered it is never unregistered. It
-	 * will be in the ULP list for the lifetime of the system. Doing
-	 * duplicate registers is not a problem.
-	 */
-	return tcp_register_ulp(&bpf_tcp_ulp_ops);
-}
-
-static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
-{
-	struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
-	int rc;
-
-	if (unlikely(!prog))
-		return __SK_DROP;
-
-	skb_orphan(skb);
-	/* We need to ensure that BPF metadata for maps is also cleared
-	 * when we orphan the skb so that we don't have the possibility
-	 * to reference a stale map.
-	 */
-	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-	skb->sk = psock->sock;
-	bpf_compute_data_end_sk_skb(skb);
-	preempt_disable();
-	rc = (*prog->bpf_func)(skb, prog->insnsi);
-	preempt_enable();
-	skb->sk = NULL;
-
-	/* Moving return codes from UAPI namespace into internal namespace */
-	return rc == SK_PASS ?
-		(TCP_SKB_CB(skb)->bpf.sk_redir ? __SK_REDIRECT : __SK_PASS) :
-		__SK_DROP;
-}
-
-static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
-{
-	struct sock *sk = psock->sock;
-	int copied = 0, num_sg;
-	struct sk_msg_buff *r;
-
-	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
-	if (unlikely(!r))
-		return -EAGAIN;
-
-	if (!sk_rmem_schedule(sk, skb, skb->len)) {
-		kfree(r);
-		return -EAGAIN;
-	}
-
-	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
-	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
-	if (unlikely(num_sg < 0)) {
-		kfree(r);
-		return num_sg;
-	}
-	sk_mem_charge(sk, skb->len);
-	copied = skb->len;
-	r->sg_start = 0;
-	r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
-	r->skb = skb;
-	list_add_tail(&r->list, &psock->ingress);
-	sk->sk_data_ready(sk);
-	return copied;
-}
-
-static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
-{
-	struct smap_psock *peer;
-	struct sock *sk;
-	__u32 in;
-	int rc;
-
-	rc = smap_verdict_func(psock, skb);
-	switch (rc) {
-	case __SK_REDIRECT:
-		sk = do_sk_redirect_map(skb);
-		if (!sk) {
-			kfree_skb(skb);
-			break;
-		}
-
-		peer = smap_psock_sk(sk);
-		in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
-
-		if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
-			     !test_bit(SMAP_TX_RUNNING, &peer->state))) {
-			kfree_skb(skb);
-			break;
-		}
-
-		if (!in && sock_writeable(sk)) {
-			skb_set_owner_w(skb, sk);
-			skb_queue_tail(&peer->rxqueue, skb);
-			schedule_work(&peer->tx_work);
-			break;
-		} else if (in &&
-			   atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-			skb_queue_tail(&peer->rxqueue, skb);
-			schedule_work(&peer->tx_work);
-			break;
-		}
-	/* Fall through and free skb otherwise */
-	case __SK_DROP:
-	default:
-		kfree_skb(skb);
-	}
-}
-
-static void smap_report_sk_error(struct smap_psock *psock, int err)
-{
-	struct sock *sk = psock->sock;
-
-	sk->sk_err = err;
-	sk->sk_error_report(sk);
-}
-
-static void smap_read_sock_strparser(struct strparser *strp,
-				     struct sk_buff *skb)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = container_of(strp, struct smap_psock, strp);
-	smap_do_verdict(psock, skb);
-	rcu_read_unlock();
-}
-
-/* Called with lock held on socket */
-static void smap_data_ready(struct sock *sk)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (likely(psock)) {
-		write_lock_bh(&sk->sk_callback_lock);
-		strp_data_ready(&psock->strp);
-		write_unlock_bh(&sk->sk_callback_lock);
-	}
-	rcu_read_unlock();
-}
-
-static void smap_tx_work(struct work_struct *w)
-{
-	struct smap_psock *psock;
-	struct sk_buff *skb;
-	int rem, off, n;
-
-	psock = container_of(w, struct smap_psock, tx_work);
-
-	/* lock sock to avoid losing sk_socket at some point during loop */
-	lock_sock(psock->sock);
-	if (psock->save_skb) {
-		skb = psock->save_skb;
-		rem = psock->save_rem;
-		off = psock->save_off;
-		psock->save_skb = NULL;
-		goto start;
-	}
-
-	while ((skb = skb_dequeue(&psock->rxqueue))) {
-		__u32 flags;
-
-		rem = skb->len;
-		off = 0;
-start:
-		flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
-		do {
-			if (likely(psock->sock->sk_socket)) {
-				if (flags)
-					n = smap_do_ingress(psock, skb);
-				else
-					n = skb_send_sock_locked(psock->sock,
-								 skb, off, rem);
-			} else {
-				n = -EINVAL;
-			}
-
-			if (n <= 0) {
-				if (n == -EAGAIN) {
-					/* Retry when space is available */
-					psock->save_skb = skb;
-					psock->save_rem = rem;
-					psock->save_off = off;
-					goto out;
-				}
-				/* Hard errors break pipe and stop xmit */
-				smap_report_sk_error(psock, n ? -n : EPIPE);
-				clear_bit(SMAP_TX_RUNNING, &psock->state);
-				kfree_skb(skb);
-				goto out;
-			}
-			rem -= n;
-			off += n;
-		} while (rem);
-
-		if (!flags)
-			kfree_skb(skb);
-	}
-out:
-	release_sock(psock->sock);
-}
-
-static void smap_write_space(struct sock *sk)
-{
-	struct smap_psock *psock;
-	void (*write_space)(struct sock *sk);
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state)))
-		schedule_work(&psock->tx_work);
-	write_space = psock->save_write_space;
-	rcu_read_unlock();
-	write_space(sk);
-}
-
-static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
-{
-	if (!psock->strp_enabled)
-		return;
-	sk->sk_data_ready = psock->save_data_ready;
-	sk->sk_write_space = psock->save_write_space;
-	psock->save_data_ready = NULL;
-	psock->save_write_space = NULL;
-	strp_stop(&psock->strp);
-	psock->strp_enabled = false;
-}
-
-static void smap_destroy_psock(struct rcu_head *rcu)
-{
-	struct smap_psock *psock = container_of(rcu,
-						  struct smap_psock, rcu);
-
-	/* Now that a grace period has passed there is no longer
-	 * any reference to this sock in the sockmap so we can
-	 * destroy the psock, strparser, and bpf programs. But,
-	 * because we use workqueue sync operations we can not
-	 * do it in rcu context
-	 */
-	schedule_work(&psock->gc_work);
-}
-
-static bool psock_is_smap_sk(struct sock *sk)
-{
-	return inet_csk(sk)->icsk_ulp_ops == &bpf_tcp_ulp_ops;
-}
-
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
-{
-	if (refcount_dec_and_test(&psock->refcnt)) {
-		if (psock_is_smap_sk(sock))
-			tcp_cleanup_ulp(sock);
-		write_lock_bh(&sock->sk_callback_lock);
-		smap_stop_sock(psock, sock);
-		write_unlock_bh(&sock->sk_callback_lock);
-		clear_bit(SMAP_TX_RUNNING, &psock->state);
-		rcu_assign_sk_user_data(sock, NULL);
-		call_rcu_sched(&psock->rcu, smap_destroy_psock);
-	}
-}
-
-static int smap_parse_func_strparser(struct strparser *strp,
-				       struct sk_buff *skb)
-{
-	struct smap_psock *psock;
-	struct bpf_prog *prog;
-	int rc;
-
-	rcu_read_lock();
-	psock = container_of(strp, struct smap_psock, strp);
-	prog = READ_ONCE(psock->bpf_parse);
-
-	if (unlikely(!prog)) {
-		rcu_read_unlock();
-		return skb->len;
-	}
-
-	/* Attach socket for bpf program to use if needed we can do this
-	 * because strparser clones the skb before handing it to a upper
-	 * layer, meaning skb_orphan has been called. We NULL sk on the
-	 * way out to ensure we don't trigger a BUG_ON in skb/sk operations
-	 * later and because we are not charging the memory of this skb to
-	 * any socket yet.
-	 */
-	skb->sk = psock->sock;
-	bpf_compute_data_end_sk_skb(skb);
-	rc = (*prog->bpf_func)(skb, prog->insnsi);
-	skb->sk = NULL;
-	rcu_read_unlock();
-	return rc;
-}
-
-static int smap_read_sock_done(struct strparser *strp, int err)
-{
-	return err;
-}
-
-static int smap_init_sock(struct smap_psock *psock,
-			  struct sock *sk)
-{
-	static const struct strp_callbacks cb = {
-		.rcv_msg = smap_read_sock_strparser,
-		.parse_msg = smap_parse_func_strparser,
-		.read_sock_done = smap_read_sock_done,
-	};
-
-	return strp_init(&psock->strp, sk, &cb);
-}
-
-static void smap_init_progs(struct smap_psock *psock,
-			    struct bpf_prog *verdict,
-			    struct bpf_prog *parse)
-{
-	struct bpf_prog *orig_parse, *orig_verdict;
-
-	orig_parse = xchg(&psock->bpf_parse, parse);
-	orig_verdict = xchg(&psock->bpf_verdict, verdict);
-
-	if (orig_verdict)
-		bpf_prog_put(orig_verdict);
-	if (orig_parse)
-		bpf_prog_put(orig_parse);
-}
-
-static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
-{
-	if (sk->sk_data_ready == smap_data_ready)
-		return;
-	psock->save_data_ready = sk->sk_data_ready;
-	psock->save_write_space = sk->sk_write_space;
-	sk->sk_data_ready = smap_data_ready;
-	sk->sk_write_space = smap_write_space;
-	psock->strp_enabled = true;
-}
-
-static void sock_map_remove_complete(struct bpf_stab *stab)
-{
-	bpf_map_area_free(stab->sock_map);
-	kfree(stab);
-}
-
-static void smap_gc_work(struct work_struct *w)
-{
-	struct smap_psock_map_entry *e, *tmp;
-	struct sk_msg_buff *md, *mtmp;
-	struct smap_psock *psock;
-
-	psock = container_of(w, struct smap_psock, gc_work);
-
-	/* no callback lock needed because we already detached sockmap ops */
-	if (psock->strp_enabled)
-		strp_done(&psock->strp);
-
-	cancel_work_sync(&psock->tx_work);
-	__skb_queue_purge(&psock->rxqueue);
-
-	/* At this point all strparser and xmit work must be complete */
-	if (psock->bpf_parse)
-		bpf_prog_put(psock->bpf_parse);
-	if (psock->bpf_verdict)
-		bpf_prog_put(psock->bpf_verdict);
-	if (psock->bpf_tx_msg)
-		bpf_prog_put(psock->bpf_tx_msg);
-
-	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork, true);
-		kfree(psock->cork);
-	}
-
-	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
-		list_del(&md->list);
-		free_start_sg(psock->sock, md, true);
-		kfree(md);
-	}
-
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		list_del(&e->list);
-		kfree(e);
-	}
-
-	if (psock->sk_redir)
-		sock_put(psock->sk_redir);
-
-	sock_put(psock->sock);
-	kfree(psock);
-}
-
-static struct smap_psock *smap_init_psock(struct sock *sock, int node)
-{
-	struct smap_psock *psock;
-
-	psock = kzalloc_node(sizeof(struct smap_psock),
-			     GFP_ATOMIC | __GFP_NOWARN,
-			     node);
-	if (!psock)
-		return ERR_PTR(-ENOMEM);
-
-	psock->eval =  __SK_NONE;
-	psock->sock = sock;
-	skb_queue_head_init(&psock->rxqueue);
-	INIT_WORK(&psock->tx_work, smap_tx_work);
-	INIT_WORK(&psock->gc_work, smap_gc_work);
-	INIT_LIST_HEAD(&psock->maps);
-	INIT_LIST_HEAD(&psock->ingress);
-	refcount_set(&psock->refcnt, 1);
-	spin_lock_init(&psock->maps_lock);
-
-	rcu_assign_sk_user_data(sock, psock);
-	sock_hold(sock);
-	return psock;
-}
-
-static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
-{
-	struct bpf_stab *stab;
-	u64 cost;
-	int err;
-
-	if (!capable(CAP_NET_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	/* check sanity of attributes */
-	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
-		return ERR_PTR(-EINVAL);
-
-	err = bpf_tcp_ulp_register();
-	if (err && err != -EEXIST)
-		return ERR_PTR(err);
-
-	stab = kzalloc(sizeof(*stab), GFP_USER);
-	if (!stab)
-		return ERR_PTR(-ENOMEM);
-
-	bpf_map_init_from_attr(&stab->map, attr);
-	raw_spin_lock_init(&stab->lock);
-
-	/* make sure page count doesn't overflow */
-	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
-	err = -EINVAL;
-	if (cost >= U32_MAX - PAGE_SIZE)
-		goto free_stab;
-
-	stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-
-	/* if map size is larger than memlock limit, reject it early */
-	err = bpf_map_precharge_memlock(stab->map.pages);
-	if (err)
-		goto free_stab;
-
-	err = -ENOMEM;
-	stab->sock_map = bpf_map_area_alloc(stab->map.max_entries *
-					    sizeof(struct sock *),
-					    stab->map.numa_node);
-	if (!stab->sock_map)
-		goto free_stab;
-
-	return &stab->map;
-free_stab:
-	kfree(stab);
-	return ERR_PTR(err);
-}
-
-static void smap_list_map_remove(struct smap_psock *psock,
-				 struct sock **entry)
-{
-	struct smap_psock_map_entry *e, *tmp;
-
-	spin_lock_bh(&psock->maps_lock);
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		if (e->entry == entry) {
-			list_del(&e->list);
-			kfree(e);
-		}
-	}
-	spin_unlock_bh(&psock->maps_lock);
-}
-
-static void smap_list_hash_remove(struct smap_psock *psock,
-				  struct htab_elem *hash_link)
-{
-	struct smap_psock_map_entry *e, *tmp;
-
-	spin_lock_bh(&psock->maps_lock);
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		struct htab_elem *c = rcu_dereference(e->hash_link);
-
-		if (c == hash_link) {
-			list_del(&e->list);
-			kfree(e);
-		}
-	}
-	spin_unlock_bh(&psock->maps_lock);
-}
-
-static void sock_map_free(struct bpf_map *map)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	int i;
-
-	synchronize_rcu();
-
-	/* At this point no update, lookup or delete operations can happen.
-	 * However, be aware we can still get a socket state event updates,
-	 * and data ready callabacks that reference the psock from sk_user_data
-	 * Also psock worker threads are still in-flight. So smap_release_sock
-	 * will only free the psock after cancel_sync on the worker threads
-	 * and a grace period expire to ensure psock is really safe to remove.
-	 */
-	rcu_read_lock();
-	raw_spin_lock_bh(&stab->lock);
-	for (i = 0; i < stab->map.max_entries; i++) {
-		struct smap_psock *psock;
-		struct sock *sock;
-
-		sock = stab->sock_map[i];
-		if (!sock)
-			continue;
-		stab->sock_map[i] = NULL;
-		psock = smap_psock_sk(sock);
-		/* This check handles a racing sock event that can get the
-		 * sk_callback_lock before this case but after xchg happens
-		 * causing the refcnt to hit zero and sock user data (psock)
-		 * to be null and queued for garbage collection.
-		 */
-		if (likely(psock)) {
-			smap_list_map_remove(psock, &stab->sock_map[i]);
-			smap_release_sock(psock, sock);
-		}
-	}
-	raw_spin_unlock_bh(&stab->lock);
-	rcu_read_unlock();
-
-	sock_map_remove_complete(stab);
-}
-
-static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	u32 i = key ? *(u32 *)key : U32_MAX;
-	u32 *next = (u32 *)next_key;
-
-	if (i >= stab->map.max_entries) {
-		*next = 0;
-		return 0;
-	}
-
-	if (i == stab->map.max_entries - 1)
-		return -ENOENT;
-
-	*next = i + 1;
-	return 0;
-}
-
-struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-
-	if (key >= map->max_entries)
-		return NULL;
-
-	return READ_ONCE(stab->sock_map[key]);
-}
-
-static int sock_map_delete_elem(struct bpf_map *map, void *key)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	struct smap_psock *psock;
-	int k = *(u32 *)key;
-	struct sock *sock;
-
-	if (k >= map->max_entries)
-		return -EINVAL;
-
-	raw_spin_lock_bh(&stab->lock);
-	sock = stab->sock_map[k];
-	stab->sock_map[k] = NULL;
-	raw_spin_unlock_bh(&stab->lock);
-	if (!sock)
-		return -EINVAL;
-
-	psock = smap_psock_sk(sock);
-	if (!psock)
-		return 0;
-	if (psock->bpf_parse) {
-		write_lock_bh(&sock->sk_callback_lock);
-		smap_stop_sock(psock, sock);
-		write_unlock_bh(&sock->sk_callback_lock);
-	}
-	smap_list_map_remove(psock, &stab->sock_map[k]);
-	smap_release_sock(psock, sock);
-	return 0;
-}
-
-/* Locking notes: Concurrent updates, deletes, and lookups are allowed and are
- * done inside rcu critical sections. This ensures on updates that the psock
- * will not be released via smap_release_sock() until concurrent updates/deletes
- * complete. All operations operate on sock_map using cmpxchg and xchg
- * operations to ensure we do not get stale references. Any reads into the
- * map must be done with READ_ONCE() because of this.
- *
- * A psock is destroyed via call_rcu and after any worker threads are cancelled
- * and syncd so we are certain all references from the update/lookup/delete
- * operations as well as references in the data path are no longer in use.
- *
- * Psocks may exist in multiple maps, but only a single set of parse/verdict
- * programs may be inherited from the maps it belongs to. A reference count
- * is kept with the total number of references to the psock from all maps. The
- * psock will not be released until this reaches zero. The psock and sock
- * user data data use the sk_callback_lock to protect critical data structures
- * from concurrent access. This allows us to avoid two updates from modifying
- * the user data in sock and the lock is required anyways for modifying
- * callbacks, we simply increase its scope slightly.
- *
- * Rules to follow,
- *  - psock must always be read inside RCU critical section
- *  - sk_user_data must only be modified inside sk_callback_lock and read
- *    inside RCU critical section.
- *  - psock->maps list must only be read & modified inside sk_callback_lock
- *  - sock_map must use READ_ONCE and (cmp)xchg operations
- *  - BPF verdict/parse programs must use READ_ONCE and xchg operations
- */
-
-static int __sock_map_ctx_update_elem(struct bpf_map *map,
-				      struct bpf_sock_progs *progs,
-				      struct sock *sock,
-				      void *key)
-{
-	struct bpf_prog *verdict, *parse, *tx_msg;
-	struct smap_psock *psock;
-	bool new = false;
-	int err = 0;
-
-	/* 1. If sock map has BPF programs those will be inherited by the
-	 * sock being added. If the sock is already attached to BPF programs
-	 * this results in an error.
-	 */
-	verdict = READ_ONCE(progs->bpf_verdict);
-	parse = READ_ONCE(progs->bpf_parse);
-	tx_msg = READ_ONCE(progs->bpf_tx_msg);
-
-	if (parse && verdict) {
-		/* bpf prog refcnt may be zero if a concurrent attach operation
-		 * removes the program after the above READ_ONCE() but before
-		 * we increment the refcnt. If this is the case abort with an
-		 * error.
-		 */
-		verdict = bpf_prog_inc_not_zero(verdict);
-		if (IS_ERR(verdict))
-			return PTR_ERR(verdict);
-
-		parse = bpf_prog_inc_not_zero(parse);
-		if (IS_ERR(parse)) {
-			bpf_prog_put(verdict);
-			return PTR_ERR(parse);
-		}
-	}
-
-	if (tx_msg) {
-		tx_msg = bpf_prog_inc_not_zero(tx_msg);
-		if (IS_ERR(tx_msg)) {
-			if (parse && verdict) {
-				bpf_prog_put(parse);
-				bpf_prog_put(verdict);
-			}
-			return PTR_ERR(tx_msg);
-		}
-	}
-
-	psock = smap_psock_sk(sock);
-
-	/* 2. Do not allow inheriting programs if psock exists and has
-	 * already inherited programs. This would create confusion on
-	 * which parser/verdict program is running. If no psock exists
-	 * create one. Inside sk_callback_lock to ensure concurrent create
-	 * doesn't update user data.
-	 */
-	if (psock) {
-		if (!psock_is_smap_sk(sock)) {
-			err = -EBUSY;
-			goto out_progs;
-		}
-		if (READ_ONCE(psock->bpf_parse) && parse) {
-			err = -EBUSY;
-			goto out_progs;
-		}
-		if (READ_ONCE(psock->bpf_tx_msg) && tx_msg) {
-			err = -EBUSY;
-			goto out_progs;
-		}
-		if (!refcount_inc_not_zero(&psock->refcnt)) {
-			err = -EAGAIN;
-			goto out_progs;
-		}
-	} else {
-		psock = smap_init_psock(sock, map->numa_node);
-		if (IS_ERR(psock)) {
-			err = PTR_ERR(psock);
-			goto out_progs;
-		}
-
-		set_bit(SMAP_TX_RUNNING, &psock->state);
-		new = true;
-	}
-
-	/* 3. At this point we have a reference to a valid psock that is
-	 * running. Attach any BPF programs needed.
-	 */
-	if (tx_msg)
-		bpf_tcp_msg_add(psock, sock, tx_msg);
-	if (new) {
-		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
-		if (err)
-			goto out_free;
-	}
-
-	if (parse && verdict && !psock->strp_enabled) {
-		err = smap_init_sock(psock, sock);
-		if (err)
-			goto out_free;
-		smap_init_progs(psock, verdict, parse);
-		write_lock_bh(&sock->sk_callback_lock);
-		smap_start_sock(psock, sock);
-		write_unlock_bh(&sock->sk_callback_lock);
-	}
-
-	return err;
-out_free:
-	smap_release_sock(psock, sock);
-out_progs:
-	if (parse && verdict) {
-		bpf_prog_put(parse);
-		bpf_prog_put(verdict);
-	}
-	if (tx_msg)
-		bpf_prog_put(tx_msg);
-	return err;
-}
-
-static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
-				    struct bpf_map *map,
-				    void *key, u64 flags)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	struct bpf_sock_progs *progs = &stab->progs;
-	struct sock *osock, *sock = skops->sk;
-	struct smap_psock_map_entry *e;
-	struct smap_psock *psock;
-	u32 i = *(u32 *)key;
-	int err;
-
-	if (unlikely(flags > BPF_EXIST))
-		return -EINVAL;
-	if (unlikely(i >= stab->map.max_entries))
-		return -E2BIG;
-
-	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
-	if (!e)
-		return -ENOMEM;
-
-	err = __sock_map_ctx_update_elem(map, progs, sock, key);
-	if (err)
-		goto out;
-
-	/* psock guaranteed to be present. */
-	psock = smap_psock_sk(sock);
-	raw_spin_lock_bh(&stab->lock);
-	osock = stab->sock_map[i];
-	if (osock && flags == BPF_NOEXIST) {
-		err = -EEXIST;
-		goto out_unlock;
-	}
-	if (!osock && flags == BPF_EXIST) {
-		err = -ENOENT;
-		goto out_unlock;
-	}
-
-	e->entry = &stab->sock_map[i];
-	e->map = map;
-	spin_lock_bh(&psock->maps_lock);
-	list_add_tail(&e->list, &psock->maps);
-	spin_unlock_bh(&psock->maps_lock);
-
-	stab->sock_map[i] = sock;
-	if (osock) {
-		psock = smap_psock_sk(osock);
-		smap_list_map_remove(psock, &stab->sock_map[i]);
-		smap_release_sock(psock, osock);
-	}
-	raw_spin_unlock_bh(&stab->lock);
-	return 0;
-out_unlock:
-	smap_release_sock(psock, sock);
-	raw_spin_unlock_bh(&stab->lock);
-out:
-	kfree(e);
-	return err;
-}
-
-int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
-{
-	struct bpf_sock_progs *progs;
-	struct bpf_prog *orig;
-
-	if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
-		struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-
-		progs = &stab->progs;
-	} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH) {
-		struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-
-		progs = &htab->progs;
-	} else {
-		return -EINVAL;
-	}
-
-	switch (type) {
-	case BPF_SK_MSG_VERDICT:
-		orig = xchg(&progs->bpf_tx_msg, prog);
-		break;
-	case BPF_SK_SKB_STREAM_PARSER:
-		orig = xchg(&progs->bpf_parse, prog);
-		break;
-	case BPF_SK_SKB_STREAM_VERDICT:
-		orig = xchg(&progs->bpf_verdict, prog);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	if (orig)
-		bpf_prog_put(orig);
-
-	return 0;
-}
-
-int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-			struct bpf_prog *prog)
-{
-	int ufd = attr->target_fd;
-	struct bpf_map *map;
-	struct fd f;
-	int err;
-
-	f = fdget(ufd);
-	map = __bpf_map_get(f);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-
-	err = sock_map_prog(map, prog, attr->attach_type);
-	fdput(f);
-	return err;
-}
-
-static void *sock_map_lookup(struct bpf_map *map, void *key)
-{
-	return NULL;
-}
-
-static int sock_map_update_elem(struct bpf_map *map,
-				void *key, void *value, u64 flags)
-{
-	struct bpf_sock_ops_kern skops;
-	u32 fd = *(u32 *)value;
-	struct socket *socket;
-	int err;
-
-	socket = sockfd_lookup(fd, &err);
-	if (!socket)
-		return err;
-
-	skops.sk = socket->sk;
-	if (!skops.sk) {
-		fput(socket->file);
-		return -EINVAL;
-	}
-
-	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
-	 * state.
-	 */
-	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP ||
-	    skops.sk->sk_state != TCP_ESTABLISHED) {
-		fput(socket->file);
-		return -EOPNOTSUPP;
-	}
-
-	lock_sock(skops.sk);
-	preempt_disable();
-	rcu_read_lock();
-	err = sock_map_ctx_update_elem(&skops, map, key, flags);
-	rcu_read_unlock();
-	preempt_enable();
-	release_sock(skops.sk);
-	fput(socket->file);
-	return err;
-}
-
-static void sock_map_release(struct bpf_map *map)
-{
-	struct bpf_sock_progs *progs;
-	struct bpf_prog *orig;
-
-	if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
-		struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-
-		progs = &stab->progs;
-	} else {
-		struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-
-		progs = &htab->progs;
-	}
-
-	orig = xchg(&progs->bpf_parse, NULL);
-	if (orig)
-		bpf_prog_put(orig);
-	orig = xchg(&progs->bpf_verdict, NULL);
-	if (orig)
-		bpf_prog_put(orig);
-
-	orig = xchg(&progs->bpf_tx_msg, NULL);
-	if (orig)
-		bpf_prog_put(orig);
-}
-
-static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
-{
-	struct bpf_htab *htab;
-	int i, err;
-	u64 cost;
-
-	if (!capable(CAP_NET_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	/* check sanity of attributes */
-	if (attr->max_entries == 0 ||
-	    attr->key_size == 0 ||
-	    attr->value_size != 4 ||
-	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
-		return ERR_PTR(-EINVAL);
-
-	if (attr->key_size > MAX_BPF_STACK)
-		/* eBPF programs initialize keys on stack, so they cannot be
-		 * larger than max stack size
-		 */
-		return ERR_PTR(-E2BIG);
-
-	err = bpf_tcp_ulp_register();
-	if (err && err != -EEXIST)
-		return ERR_PTR(err);
-
-	htab = kzalloc(sizeof(*htab), GFP_USER);
-	if (!htab)
-		return ERR_PTR(-ENOMEM);
-
-	bpf_map_init_from_attr(&htab->map, attr);
-
-	htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
-	htab->elem_size = sizeof(struct htab_elem) +
-			  round_up(htab->map.key_size, 8);
-	err = -EINVAL;
-	if (htab->n_buckets == 0 ||
-	    htab->n_buckets > U32_MAX / sizeof(struct bucket))
-		goto free_htab;
-
-	cost = (u64) htab->n_buckets * sizeof(struct bucket) +
-	       (u64) htab->elem_size * htab->map.max_entries;
-
-	if (cost >= U32_MAX - PAGE_SIZE)
-		goto free_htab;
-
-	htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-	err = bpf_map_precharge_memlock(htab->map.pages);
-	if (err)
-		goto free_htab;
-
-	err = -ENOMEM;
-	htab->buckets = bpf_map_area_alloc(
-				htab->n_buckets * sizeof(struct bucket),
-				htab->map.numa_node);
-	if (!htab->buckets)
-		goto free_htab;
-
-	for (i = 0; i < htab->n_buckets; i++) {
-		INIT_HLIST_HEAD(&htab->buckets[i].head);
-		raw_spin_lock_init(&htab->buckets[i].lock);
-	}
-
-	return &htab->map;
-free_htab:
-	kfree(htab);
-	return ERR_PTR(err);
-}
-
-static void __bpf_htab_free(struct rcu_head *rcu)
-{
-	struct bpf_htab *htab;
-
-	htab = container_of(rcu, struct bpf_htab, rcu);
-	bpf_map_area_free(htab->buckets);
-	kfree(htab);
-}
-
-static void sock_hash_free(struct bpf_map *map)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	int i;
-
-	synchronize_rcu();
-
-	/* At this point no update, lookup or delete operations can happen.
-	 * However, be aware we can still get a socket state event updates,
-	 * and data ready callabacks that reference the psock from sk_user_data
-	 * Also psock worker threads are still in-flight. So smap_release_sock
-	 * will only free the psock after cancel_sync on the worker threads
-	 * and a grace period expire to ensure psock is really safe to remove.
-	 */
-	rcu_read_lock();
-	for (i = 0; i < htab->n_buckets; i++) {
-		struct bucket *b = __select_bucket(htab, i);
-		struct hlist_head *head;
-		struct hlist_node *n;
-		struct htab_elem *l;
-
-		raw_spin_lock_bh(&b->lock);
-		head = &b->head;
-		hlist_for_each_entry_safe(l, n, head, hash_node) {
-			struct sock *sock = l->sk;
-			struct smap_psock *psock;
-
-			hlist_del_rcu(&l->hash_node);
-			psock = smap_psock_sk(sock);
-			/* This check handles a racing sock event that can get
-			 * the sk_callback_lock before this case but after xchg
-			 * causing the refcnt to hit zero and sock user data
-			 * (psock) to be null and queued for garbage collection.
-			 */
-			if (likely(psock)) {
-				smap_list_hash_remove(psock, l);
-				smap_release_sock(psock, sock);
-			}
-			free_htab_elem(htab, l);
-		}
-		raw_spin_unlock_bh(&b->lock);
-	}
-	rcu_read_unlock();
-	call_rcu(&htab->rcu, __bpf_htab_free);
-}
-
-static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
-					      void *key, u32 key_size, u32 hash,
-					      struct sock *sk,
-					      struct htab_elem *old_elem)
-{
-	struct htab_elem *l_new;
-
-	if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-		if (!old_elem) {
-			atomic_dec(&htab->count);
-			return ERR_PTR(-E2BIG);
-		}
-	}
-	l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
-			     htab->map.numa_node);
-	if (!l_new) {
-		atomic_dec(&htab->count);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	memcpy(l_new->key, key, key_size);
-	l_new->sk = sk;
-	l_new->hash = hash;
-	return l_new;
-}
-
-static inline u32 htab_map_hash(const void *key, u32 key_len)
-{
-	return jhash(key, key_len, 0);
-}
-
-static int sock_hash_get_next_key(struct bpf_map *map,
-				  void *key, void *next_key)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct htab_elem *l, *next_l;
-	struct hlist_head *h;
-	u32 hash, key_size;
-	int i = 0;
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	key_size = map->key_size;
-	if (!key)
-		goto find_first_elem;
-	hash = htab_map_hash(key, key_size);
-	h = select_bucket(htab, hash);
-
-	l = lookup_elem_raw(h, hash, key, key_size);
-	if (!l)
-		goto find_first_elem;
-	next_l = hlist_entry_safe(
-		     rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
-		     struct htab_elem, hash_node);
-	if (next_l) {
-		memcpy(next_key, next_l->key, key_size);
-		return 0;
-	}
-
-	/* no more elements in this hash list, go to the next bucket */
-	i = hash & (htab->n_buckets - 1);
-	i++;
-
-find_first_elem:
-	/* iterate over buckets */
-	for (; i < htab->n_buckets; i++) {
-		h = select_bucket(htab, i);
-
-		/* pick first element in the bucket */
-		next_l = hlist_entry_safe(
-				rcu_dereference_raw(hlist_first_rcu(h)),
-				struct htab_elem, hash_node);
-		if (next_l) {
-			/* if it's not empty, just return it */
-			memcpy(next_key, next_l->key, key_size);
-			return 0;
-		}
-	}
-
-	/* iterated over all buckets and all elements */
-	return -ENOENT;
-}
-
-static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
-				     struct bpf_map *map,
-				     void *key, u64 map_flags)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct bpf_sock_progs *progs = &htab->progs;
-	struct htab_elem *l_new = NULL, *l_old;
-	struct smap_psock_map_entry *e = NULL;
-	struct hlist_head *head;
-	struct smap_psock *psock;
-	u32 key_size, hash;
-	struct sock *sock;
-	struct bucket *b;
-	int err;
-
-	sock = skops->sk;
-
-	if (sock->sk_type != SOCK_STREAM ||
-	    sock->sk_protocol != IPPROTO_TCP)
-		return -EOPNOTSUPP;
-
-	if (unlikely(map_flags > BPF_EXIST))
-		return -EINVAL;
-
-	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
-	if (!e)
-		return -ENOMEM;
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	key_size = map->key_size;
-	hash = htab_map_hash(key, key_size);
-	b = __select_bucket(htab, hash);
-	head = &b->head;
-
-	err = __sock_map_ctx_update_elem(map, progs, sock, key);
-	if (err)
-		goto err;
-
-	/* psock is valid here because otherwise above *ctx_update_elem would
-	 * have thrown an error. It is safe to skip error check.
-	 */
-	psock = smap_psock_sk(sock);
-	raw_spin_lock_bh(&b->lock);
-	l_old = lookup_elem_raw(head, hash, key, key_size);
-	if (l_old && map_flags == BPF_NOEXIST) {
-		err = -EEXIST;
-		goto bucket_err;
-	}
-	if (!l_old && map_flags == BPF_EXIST) {
-		err = -ENOENT;
-		goto bucket_err;
-	}
-
-	l_new = alloc_sock_hash_elem(htab, key, key_size, hash, sock, l_old);
-	if (IS_ERR(l_new)) {
-		err = PTR_ERR(l_new);
-		goto bucket_err;
-	}
-
-	rcu_assign_pointer(e->hash_link, l_new);
-	e->map = map;
-	spin_lock_bh(&psock->maps_lock);
-	list_add_tail(&e->list, &psock->maps);
-	spin_unlock_bh(&psock->maps_lock);
-
-	/* add new element to the head of the list, so that
-	 * concurrent search will find it before old elem
-	 */
-	hlist_add_head_rcu(&l_new->hash_node, head);
-	if (l_old) {
-		psock = smap_psock_sk(l_old->sk);
-
-		hlist_del_rcu(&l_old->hash_node);
-		smap_list_hash_remove(psock, l_old);
-		smap_release_sock(psock, l_old->sk);
-		free_htab_elem(htab, l_old);
-	}
-	raw_spin_unlock_bh(&b->lock);
-	return 0;
-bucket_err:
-	smap_release_sock(psock, sock);
-	raw_spin_unlock_bh(&b->lock);
-err:
-	kfree(e);
-	return err;
-}
-
-static int sock_hash_update_elem(struct bpf_map *map,
-				void *key, void *value, u64 flags)
-{
-	struct bpf_sock_ops_kern skops;
-	u32 fd = *(u32 *)value;
-	struct socket *socket;
-	int err;
-
-	socket = sockfd_lookup(fd, &err);
-	if (!socket)
-		return err;
-
-	skops.sk = socket->sk;
-	if (!skops.sk) {
-		fput(socket->file);
-		return -EINVAL;
-	}
-
-	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
-	 * state.
-	 */
-	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP ||
-	    skops.sk->sk_state != TCP_ESTABLISHED) {
-		fput(socket->file);
-		return -EOPNOTSUPP;
-	}
-
-	lock_sock(skops.sk);
-	preempt_disable();
-	rcu_read_lock();
-	err = sock_hash_ctx_update_elem(&skops, map, key, flags);
-	rcu_read_unlock();
-	preempt_enable();
-	release_sock(skops.sk);
-	fput(socket->file);
-	return err;
-}
-
-static int sock_hash_delete_elem(struct bpf_map *map, void *key)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
-	struct bucket *b;
-	struct htab_elem *l;
-	u32 hash, key_size;
-	int ret = -ENOENT;
-
-	key_size = map->key_size;
-	hash = htab_map_hash(key, key_size);
-	b = __select_bucket(htab, hash);
-	head = &b->head;
-
-	raw_spin_lock_bh(&b->lock);
-	l = lookup_elem_raw(head, hash, key, key_size);
-	if (l) {
-		struct sock *sock = l->sk;
-		struct smap_psock *psock;
-
-		hlist_del_rcu(&l->hash_node);
-		psock = smap_psock_sk(sock);
-		/* This check handles a racing sock event that can get the
-		 * sk_callback_lock before this case but after xchg happens
-		 * causing the refcnt to hit zero and sock user data (psock)
-		 * to be null and queued for garbage collection.
-		 */
-		if (likely(psock)) {
-			smap_list_hash_remove(psock, l);
-			smap_release_sock(psock, sock);
-		}
-		free_htab_elem(htab, l);
-		ret = 0;
-	}
-	raw_spin_unlock_bh(&b->lock);
-	return ret;
-}
-
-struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
-	struct htab_elem *l;
-	u32 key_size, hash;
-	struct bucket *b;
-	struct sock *sk;
-
-	key_size = map->key_size;
-	hash = htab_map_hash(key, key_size);
-	b = __select_bucket(htab, hash);
-	head = &b->head;
-
-	l = lookup_elem_raw(head, hash, key, key_size);
-	sk = l ? l->sk : NULL;
-	return sk;
-}
-
-const struct bpf_map_ops sock_map_ops = {
-	.map_alloc = sock_map_alloc,
-	.map_free = sock_map_free,
-	.map_lookup_elem = sock_map_lookup,
-	.map_get_next_key = sock_map_get_next_key,
-	.map_update_elem = sock_map_update_elem,
-	.map_delete_elem = sock_map_delete_elem,
-	.map_release_uref = sock_map_release,
-	.map_check_btf = map_check_no_btf,
-};
-
-const struct bpf_map_ops sock_hash_ops = {
-	.map_alloc = sock_hash_alloc,
-	.map_free = sock_hash_free,
-	.map_lookup_elem = sock_map_lookup,
-	.map_get_next_key = sock_hash_get_next_key,
-	.map_update_elem = sock_hash_update_elem,
-	.map_delete_elem = sock_hash_delete_elem,
-	.map_release_uref = sock_map_release,
-	.map_check_btf = map_check_no_btf,
-};
-
-static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
-{
-	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
-	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
-}
-BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
-	 * state. This checks that the sock ops triggering the update is
-	 * one indicating we are (or will be soon) in an ESTABLISHED state.
-	 */
-	if (!bpf_is_valid_sock_op(bpf_sock))
-		return -EOPNOTSUPP;
-	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
-}
-
-const struct bpf_func_proto bpf_sock_map_update_proto = {
-	.func		= bpf_sock_map_update,
-	.gpl_only	= false,
-	.pkt_access	= true,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_CONST_MAP_PTR,
-	.arg3_type	= ARG_PTR_TO_MAP_KEY,
-	.arg4_type	= ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	if (!bpf_is_valid_sock_op(bpf_sock))
-		return -EOPNOTSUPP;
-	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
-}
-
-const struct bpf_func_proto bpf_sock_hash_update_proto = {
-	.func		= bpf_sock_hash_update,
-	.gpl_only	= false,
-	.pkt_access	= true,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_CONST_MAP_PTR,
-	.arg3_type	= ARG_PTR_TO_MAP_KEY,
-	.arg4_type	= ARG_ANYTHING,
-};
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 8061a439ef18..b2ade10f7ec3 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -505,7 +505,7 @@ const struct bpf_func_proto bpf_get_stack_proto = {
 /* Called from eBPF program */
 static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
 {
-	return NULL;
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 /* Called from syscall */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5742df21598c..f4ecd6ed2252 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -719,10 +719,15 @@ static int map_lookup_elem(union bpf_attr *attr)
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
-		if (ptr)
+		if (IS_ERR(ptr)) {
+			err = PTR_ERR(ptr);
+		} else if (!ptr) {
+			err = -ENOENT;
+		} else {
+			err = 0;
 			memcpy(value, ptr, value_size);
+		}
 		rcu_read_unlock();
-		err = ptr ? 0 : -ENOENT;
 	}
 
 	if (err)
@@ -743,6 +748,17 @@ err_put:
 	return err;
 }
 
+static void maybe_wait_bpf_programs(struct bpf_map *map)
+{
+	/* Wait for any running BPF programs to complete so that
+	 * userspace, when we return to it, knows that all programs
+	 * that could be running use the new map value.
+	 */
+	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
+	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+		synchronize_rcu();
+}
+
 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 
 static int map_update_elem(union bpf_attr *attr)
@@ -837,6 +853,7 @@ static int map_update_elem(union bpf_attr *attr)
 	}
 	__this_cpu_dec(bpf_prog_active);
 	preempt_enable();
+	maybe_wait_bpf_programs(map);
 out:
 free_value:
 	kfree(value);
@@ -889,6 +906,7 @@ static int map_delete_elem(union bpf_attr *attr)
 	rcu_read_unlock();
 	__this_cpu_dec(bpf_prog_active);
 	preempt_enable();
+	maybe_wait_bpf_programs(map);
 out:
 	kfree(key);
 err_put:
@@ -1646,7 +1664,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	switch (ptype) {
 	case BPF_PROG_TYPE_SK_SKB:
 	case BPF_PROG_TYPE_SK_MSG:
-		ret = sockmap_get_from_fd(attr, ptype, prog);
+		ret = sock_map_get_from_fd(attr, prog);
 		break;
 	case BPF_PROG_TYPE_LIRC_MODE2:
 		ret = lirc_prog_attach(attr, prog);
@@ -1700,10 +1718,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
+		return sock_map_get_from_fd(attr, NULL);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
+		return sock_map_get_from_fd(attr, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
 	case BPF_FLOW_DISSECTOR:
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 9f8463afda9c..ef0b7b6ef8a5 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -154,7 +154,7 @@ void __xsk_map_flush(struct bpf_map *map)
 
 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
 {
-	return NULL;
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
diff --git a/net/Kconfig b/net/Kconfig
index 228dfa382eec..f235edb593ba 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -300,8 +300,11 @@ config BPF_JIT
 
 config BPF_STREAM_PARSER
 	bool "enable BPF STREAM_PARSER"
+	depends on INET
 	depends on BPF_SYSCALL
+	depends on CGROUP_BPF
 	select STREAM_PARSER
+	select NET_SOCK_MSG
 	---help---
 	 Enabling this allows a stream parser to be used with
 	 BPF_MAP_TYPE_SOCKMAP.
@@ -413,6 +416,14 @@ config GRO_CELLS
 config SOCK_VALIDATE_XMIT
 	bool
 
+config NET_SOCK_MSG
+	bool
+	default n
+	help
+	  The NET_SOCK_MSG provides a framework for plain sockets (e.g. TCP) or
+	  ULPs (upper layer modules, e.g. TLS) to process L7 application data
+	  with the help of BPF programs.
+
 config NET_DEVLINK
 	tristate "Network physical/parent device Netlink interface"
 	help
diff --git a/net/core/Makefile b/net/core/Makefile
index 80175e6a2eb8..fccd31e0e7f7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,6 +16,7 @@ obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
+obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
 obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
+obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 8497feea8fb5..022ad73d6253 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4291,6 +4291,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	struct netdev_rx_queue *rxqueue;
 	void *orig_data, *orig_data_end;
 	u32 metalen, act = XDP_DROP;
+	__be16 orig_eth_type;
+	struct ethhdr *eth;
+	bool orig_bcast;
 	int hlen, off;
 	u32 mac_len;
 
@@ -4331,6 +4334,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	xdp->data_hard_start = skb->data - skb_headroom(skb);
 	orig_data_end = xdp->data_end;
 	orig_data = xdp->data;
+	eth = (struct ethhdr *)xdp->data;
+	orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
+	orig_eth_type = eth->h_proto;
 
 	rxqueue = netif_get_rxqueue(skb);
 	xdp->rxq = &rxqueue->xdp_rxq;
@@ -4354,6 +4360,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 
 	}
 
+	/* check if XDP changed eth hdr such SKB needs update */
+	eth = (struct ethhdr *)xdp->data;
+	if ((orig_eth_type != eth->h_proto) ||
+	    (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
+		__skb_push(skb, ETH_HLEN);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+	}
+
 	switch (act) {
 	case XDP_REDIRECT:
 	case XDP_TX:
diff --git a/net/core/filter.c b/net/core/filter.c
index 80da21b097b8..1a3ac6c46873 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -38,6 +38,7 @@
 #include <net/protocol.h>
 #include <net/netlink.h>
 #include <linux/skbuff.h>
+#include <linux/skmsg.h>
 #include <net/sock.h>
 #include <net/flow_dissector.h>
 #include <linux/errno.h>
@@ -2142,123 +2143,7 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
-	.func           = bpf_sk_redirect_hash,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_PTR_TO_MAP_KEY,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
-	   struct bpf_map *, map, u32, key, u64, flags)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-struct sock *do_sk_redirect_map(struct sk_buff *skb)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	return tcb->bpf.sk_redir;
-}
-
-static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
-	.func           = bpf_sk_redirect_map,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	msg->flags = flags;
-	msg->sk_redir = __sock_hash_lookup_elem(map, key);
-	if (!msg->sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
-	.func           = bpf_msg_redirect_hash,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_PTR_TO_MAP_KEY,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
-	   struct bpf_map *, map, u32, key, u64, flags)
-{
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	msg->flags = flags;
-	msg->sk_redir = __sock_map_lookup_elem(map, key);
-	if (!msg->sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
-{
-	return msg->sk_redir;
-}
-
-static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
-	.func           = bpf_msg_redirect_map,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->apply_bytes = bytes;
 	return 0;
@@ -2272,7 +2157,7 @@ static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->cork_bytes = bytes;
 	return 0;
@@ -2286,45 +2171,37 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-#define sk_msg_iter_var(var)			\
-	do {					\
-		var++;				\
-		if (var == MAX_SKB_FRAGS)	\
-			var = 0;		\
-	} while (0)
-
-BPF_CALL_4(bpf_msg_pull_data,
-	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
+	   u32, end, u64, flags)
 {
-	unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
-	int bytes = end - start, bytes_sg_total;
-	struct scatterlist *sg = msg->sg_data;
-	int first_sg, last_sg, i, shift;
-	unsigned char *p, *to, *from;
+	u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
+	u32 first_sge, last_sge, i, shift, bytes_sg_total;
+	struct scatterlist *sge;
+	u8 *raw, *to, *from;
 	struct page *page;
 
 	if (unlikely(flags || end <= start))
 		return -EINVAL;
 
 	/* First find the starting scatterlist element */
-	i = msg->sg_start;
+	i = msg->sg.start;
 	do {
-		len = sg[i].length;
+		len = sk_msg_elem(msg, i)->length;
 		if (start < offset + len)
 			break;
 		offset += len;
-		sk_msg_iter_var(i);
-	} while (i != msg->sg_end);
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
 
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
-	first_sg = i;
+	first_sge = i;
 	/* The start may point into the sg element so we need to also
 	 * account for the headroom.
 	 */
 	bytes_sg_total = start - offset + bytes;
-	if (!msg->sg_copy[i] && bytes_sg_total <= len)
+	if (!msg->sg.copy[i] && bytes_sg_total <= len)
 		goto out;
 
 	/* At this point we need to linearize multiple scatterlist
@@ -2338,12 +2215,12 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 * will copy the entire sg entry.
 	 */
 	do {
-		copy += sg[i].length;
-		sk_msg_iter_var(i);
+		copy += sk_msg_elem(msg, i)->length;
+		sk_msg_iter_var_next(i);
 		if (bytes_sg_total <= copy)
 			break;
-	} while (i != msg->sg_end);
-	last_sg = i;
+	} while (i != msg->sg.end);
+	last_sge = i;
 
 	if (unlikely(bytes_sg_total > copy))
 		return -EINVAL;
@@ -2352,63 +2229,61 @@ BPF_CALL_4(bpf_msg_pull_data,
 			   get_order(copy));
 	if (unlikely(!page))
 		return -ENOMEM;
-	p = page_address(page);
 
-	i = first_sg;
+	raw = page_address(page);
+	i = first_sge;
 	do {
-		from = sg_virt(&sg[i]);
-		len = sg[i].length;
-		to = p + poffset;
+		sge = sk_msg_elem(msg, i);
+		from = sg_virt(sge);
+		len = sge->length;
+		to = raw + poffset;
 
 		memcpy(to, from, len);
 		poffset += len;
-		sg[i].length = 0;
-		put_page(sg_page(&sg[i]));
+		sge->length = 0;
+		put_page(sg_page(sge));
 
-		sk_msg_iter_var(i);
-	} while (i != last_sg);
+		sk_msg_iter_var_next(i);
+	} while (i != last_sge);
 
-	sg[first_sg].length = copy;
-	sg_set_page(&sg[first_sg], page, copy, 0);
+	sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
 
 	/* To repair sg ring we need to shift entries. If we only
 	 * had a single entry though we can just replace it and
 	 * be done. Otherwise walk the ring and shift the entries.
 	 */
-	WARN_ON_ONCE(last_sg == first_sg);
-	shift = last_sg > first_sg ?
-		last_sg - first_sg - 1 :
-		MAX_SKB_FRAGS - first_sg + last_sg - 1;
+	WARN_ON_ONCE(last_sge == first_sge);
+	shift = last_sge > first_sge ?
+		last_sge - first_sge - 1 :
+		MAX_SKB_FRAGS - first_sge + last_sge - 1;
 	if (!shift)
 		goto out;
 
-	i = first_sg;
-	sk_msg_iter_var(i);
+	i = first_sge;
+	sk_msg_iter_var_next(i);
 	do {
-		int move_from;
+		u32 move_from;
 
-		if (i + shift >= MAX_SKB_FRAGS)
-			move_from = i + shift - MAX_SKB_FRAGS;
+		if (i + shift >= MAX_MSG_FRAGS)
+			move_from = i + shift - MAX_MSG_FRAGS;
 		else
 			move_from = i + shift;
-
-		if (move_from == msg->sg_end)
+		if (move_from == msg->sg.end)
 			break;
 
-		sg[i] = sg[move_from];
-		sg[move_from].length = 0;
-		sg[move_from].page_link = 0;
-		sg[move_from].offset = 0;
-
-		sk_msg_iter_var(i);
+		msg->sg.data[i] = msg->sg.data[move_from];
+		msg->sg.data[move_from].length = 0;
+		msg->sg.data[move_from].page_link = 0;
+		msg->sg.data[move_from].offset = 0;
+		sk_msg_iter_var_next(i);
 	} while (1);
-	msg->sg_end -= shift;
-	if (msg->sg_end < 0)
-		msg->sg_end += MAX_SKB_FRAGS;
+
+	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
+		      msg->sg.end - shift + MAX_MSG_FRAGS :
+		      msg->sg.end - shift;
 out:
-	msg->data = sg_virt(&sg[first_sg]) + start - offset;
+	msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
 	msg->data_end = msg->data + bytes;
-
 	return 0;
 }
 
@@ -4821,9 +4696,12 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 			      struct sk_buff *skb, u8 family, u8 proto)
 {
-	int dif = skb->dev->ifindex;
 	bool refcounted = false;
 	struct sock *sk = NULL;
+	int dif = 0;
+
+	if (skb->dev)
+		dif = skb->dev->ifindex;
 
 	if (family == AF_INET) {
 		__be32 src4 = tuple->ipv4.saddr;
@@ -4839,21 +4717,24 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 			sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
 					       dst4, tuple->ipv4.dport,
 					       dif, sdif, &udp_table, skb);
-#if IS_REACHABLE(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
 		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
+		u16 hnum = ntohs(tuple->ipv6.dport);
 		int sdif = inet6_sdif(skb);
 
 		if (proto == IPPROTO_TCP)
 			sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
 					    src6, tuple->ipv6.sport,
-					    dst6, tuple->ipv6.dport,
+					    dst6, hnum,
 					    dif, sdif, &refcounted);
-		else
-			sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
-					       dst6, tuple->ipv6.dport,
-					       dif, sdif, &udp_table, skb);
+		else if (likely(ipv6_bpf_stub))
+			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
+							    src6, tuple->ipv6.sport,
+							    dst6, hnum,
+							    dif, sdif,
+							    &udp_table, skb);
 #endif
 	}
 
@@ -5200,6 +5081,9 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+const struct bpf_func_proto bpf_sock_map_update_proto __weak;
+const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
+
 static const struct bpf_func_proto *
 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5223,6 +5107,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
+const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
+
 static const struct bpf_func_proto *
 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5244,6 +5131,9 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
+const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
+
 static const struct bpf_func_proto *
 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -6998,22 +6888,22 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 
 	switch (si->off) {
 	case offsetof(struct sk_msg_md, data):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, data));
+				      offsetof(struct sk_msg, data));
 		break;
 	case offsetof(struct sk_msg_md, data_end):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, data_end));
+				      offsetof(struct sk_msg, data_end));
 		break;
 	case offsetof(struct sk_msg_md, family):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-					      struct sk_msg_buff, sk),
+					      struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_family));
 		break;
@@ -7022,9 +6912,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_daddr));
 		break;
@@ -7034,9 +6924,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 					  skc_rcv_saddr) != 4);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-					      struct sk_msg_buff, sk),
+					      struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_rcv_saddr));
@@ -7051,9 +6941,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		off = si->off;
 		off -= offsetof(struct sk_msg_md, remote_ip6[0]);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_v6_daddr.s6_addr32[0]) +
@@ -7072,9 +6962,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		off = si->off;
 		off -= offsetof(struct sk_msg_md, local_ip6[0]);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_v6_rcv_saddr.s6_addr32[0]) +
@@ -7088,9 +6978,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_dport));
 #ifndef __BIG_ENDIAN_BITFIELD
@@ -7102,9 +6992,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_num));
 		break;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
new file mode 100644
index 000000000000..56a99d0c9aa0
--- /dev/null
+++ b/net/core/skmsg.c
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/skmsg.h>
+#include <linux/skbuff.h>
+#include <linux/scatterlist.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
+{
+	if (msg->sg.end > msg->sg.start &&
+	    elem_first_coalesce < msg->sg.end)
+		return true;
+
+	if (msg->sg.end < msg->sg.start &&
+	    (elem_first_coalesce > msg->sg.start ||
+	     elem_first_coalesce < msg->sg.end))
+		return true;
+
+	return false;
+}
+
+int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+		 int elem_first_coalesce)
+{
+	struct page_frag *pfrag = sk_page_frag(sk);
+	int ret = 0;
+
+	len -= msg->sg.size;
+	while (len > 0) {
+		struct scatterlist *sge;
+		u32 orig_offset;
+		int use, i;
+
+		if (!sk_page_frag_refill(sk, pfrag))
+			return -ENOMEM;
+
+		orig_offset = pfrag->offset;
+		use = min_t(int, len, pfrag->size - orig_offset);
+		if (!sk_wmem_schedule(sk, use))
+			return -ENOMEM;
+
+		i = msg->sg.end;
+		sk_msg_iter_var_prev(i);
+		sge = &msg->sg.data[i];
+
+		if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) &&
+		    sg_page(sge) == pfrag->page &&
+		    sge->offset + sge->length == orig_offset) {
+			sge->length += use;
+		} else {
+			if (sk_msg_full(msg)) {
+				ret = -ENOSPC;
+				break;
+			}
+
+			sge = &msg->sg.data[msg->sg.end];
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			sk_msg_iter_next(msg, end);
+		}
+
+		sk_mem_charge(sk, use);
+		msg->sg.size += use;
+		pfrag->offset += use;
+		len -= use;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_alloc);
+
+int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
+		 u32 off, u32 len)
+{
+	int i = src->sg.start;
+	struct scatterlist *sge = sk_msg_elem(src, i);
+	u32 sge_len, sge_off;
+
+	if (sk_msg_full(dst))
+		return -ENOSPC;
+
+	while (off) {
+		if (sge->length > off)
+			break;
+		off -= sge->length;
+		sk_msg_iter_var_next(i);
+		if (i == src->sg.end && off)
+			return -ENOSPC;
+		sge = sk_msg_elem(src, i);
+	}
+
+	while (len) {
+		sge_len = sge->length - off;
+		sge_off = sge->offset + off;
+		if (sge_len > len)
+			sge_len = len;
+		off = 0;
+		len -= sge_len;
+		sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
+		sk_mem_charge(sk, sge_len);
+		sk_msg_iter_var_next(i);
+		if (i == src->sg.end && len)
+			return -ENOSPC;
+		sge = sk_msg_elem(src, i);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sk_msg_clone);
+
+void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
+{
+	int i = msg->sg.start;
+
+	do {
+		struct scatterlist *sge = sk_msg_elem(msg, i);
+
+		if (bytes < sge->length) {
+			sge->length -= bytes;
+			sge->offset += bytes;
+			sk_mem_uncharge(sk, bytes);
+			break;
+		}
+
+		sk_mem_uncharge(sk, sge->length);
+		bytes -= sge->length;
+		sge->length = 0;
+		sge->offset = 0;
+		sk_msg_iter_var_next(i);
+	} while (bytes && i != msg->sg.end);
+	msg->sg.start = i;
+}
+EXPORT_SYMBOL_GPL(sk_msg_return_zero);
+
+void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes)
+{
+	int i = msg->sg.start;
+
+	do {
+		struct scatterlist *sge = &msg->sg.data[i];
+		int uncharge = (bytes < sge->length) ? bytes : sge->length;
+
+		sk_mem_uncharge(sk, uncharge);
+		bytes -= uncharge;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+}
+EXPORT_SYMBOL_GPL(sk_msg_return);
+
+static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
+			    bool charge)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, i);
+	u32 len = sge->length;
+
+	if (charge)
+		sk_mem_uncharge(sk, len);
+	if (!msg->skb)
+		put_page(sg_page(sge));
+	memset(sge, 0, sizeof(*sge));
+	return len;
+}
+
+static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
+			 bool charge)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, i);
+	int freed = 0;
+
+	while (msg->sg.size) {
+		msg->sg.size -= sge->length;
+		freed += sk_msg_free_elem(sk, msg, i, charge);
+		sk_msg_iter_var_next(i);
+		sk_msg_check_to_free(msg, i, msg->sg.size);
+		sge = sk_msg_elem(msg, i);
+	}
+	if (msg->skb)
+		consume_skb(msg->skb);
+	sk_msg_init(msg);
+	return freed;
+}
+
+int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg)
+{
+	return __sk_msg_free(sk, msg, msg->sg.start, false);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free_nocharge);
+
+int sk_msg_free(struct sock *sk, struct sk_msg *msg)
+{
+	return __sk_msg_free(sk, msg, msg->sg.start, true);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free);
+
+static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes, bool charge)
+{
+	struct scatterlist *sge;
+	u32 i = msg->sg.start;
+
+	while (bytes) {
+		sge = sk_msg_elem(msg, i);
+		if (!sge->length)
+			break;
+		if (bytes < sge->length) {
+			if (charge)
+				sk_mem_uncharge(sk, bytes);
+			sge->length -= bytes;
+			sge->offset += bytes;
+			msg->sg.size -= bytes;
+			break;
+		}
+
+		msg->sg.size -= sge->length;
+		bytes -= sge->length;
+		sk_msg_free_elem(sk, msg, i, charge);
+		sk_msg_iter_var_next(i);
+		sk_msg_check_to_free(msg, i, bytes);
+	}
+	msg->sg.start = i;
+}
+
+void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes)
+{
+	__sk_msg_free_partial(sk, msg, bytes, true);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free_partial);
+
+void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes)
+{
+	__sk_msg_free_partial(sk, msg, bytes, false);
+}
+
+void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
+{
+	int trim = msg->sg.size - len;
+	u32 i = msg->sg.end;
+
+	if (trim <= 0) {
+		WARN_ON(trim < 0);
+		return;
+	}
+
+	sk_msg_iter_var_prev(i);
+	msg->sg.size = len;
+	while (msg->sg.data[i].length &&
+	       trim >= msg->sg.data[i].length) {
+		trim -= msg->sg.data[i].length;
+		sk_msg_free_elem(sk, msg, i, true);
+		sk_msg_iter_var_prev(i);
+		if (!trim)
+			goto out;
+	}
+
+	msg->sg.data[i].length -= trim;
+	sk_mem_uncharge(sk, trim);
+out:
+	/* If we trim data before curr pointer update copybreak and current
+	 * so that any future copy operations start at new copy location.
+	 * However trimed data that has not yet been used in a copy op
+	 * does not require an update.
+	 */
+	if (msg->sg.curr >= i) {
+		msg->sg.curr = i;
+		msg->sg.copybreak = msg->sg.data[i].length;
+	}
+	sk_msg_iter_var_next(i);
+	msg->sg.end = i;
+}
+EXPORT_SYMBOL_GPL(sk_msg_trim);
+
+int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      struct sk_msg *msg, u32 bytes)
+{
+	int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg);
+	const int to_max_pages = MAX_MSG_FRAGS;
+	struct page *pages[MAX_MSG_FRAGS];
+	ssize_t orig, copied, use, offset;
+
+	orig = msg->sg.size;
+	while (bytes > 0) {
+		i = 0;
+		maxpages = to_max_pages - num_elems;
+		if (maxpages == 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		copied = iov_iter_get_pages(from, pages, bytes, maxpages,
+					    &offset);
+		if (copied <= 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		iov_iter_advance(from, copied);
+		bytes -= copied;
+		msg->sg.size += copied;
+
+		while (copied) {
+			use = min_t(int, copied, PAGE_SIZE - offset);
+			sg_set_page(&msg->sg.data[msg->sg.end],
+				    pages[i], use, offset);
+			sg_unmark_end(&msg->sg.data[msg->sg.end]);
+			sk_mem_charge(sk, use);
+
+			offset = 0;
+			copied -= use;
+			sk_msg_iter_next(msg, end);
+			num_elems++;
+			i++;
+		}
+		/* When zerocopy is mixed with sk_msg_*copy* operations we
+		 * may have a copybreak set in this case clear and prefer
+		 * zerocopy remainder when possible.
+		 */
+		msg->sg.copybreak = 0;
+		msg->sg.curr = msg->sg.end;
+	}
+out:
+	/* Revert iov_iter updates, msg will need to use 'trim' later if it
+	 * also needs to be cleared.
+	 */
+	if (ret)
+		iov_iter_revert(from, msg->sg.size - orig);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter);
+
+int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     struct sk_msg *msg, u32 bytes)
+{
+	int ret = -ENOSPC, i = msg->sg.curr;
+	struct scatterlist *sge;
+	u32 copy, buf_size;
+	void *to;
+
+	do {
+		sge = sk_msg_elem(msg, i);
+		/* This is possible if a trim operation shrunk the buffer */
+		if (msg->sg.copybreak >= sge->length) {
+			msg->sg.copybreak = 0;
+			sk_msg_iter_var_next(i);
+			if (i == msg->sg.end)
+				break;
+			sge = sk_msg_elem(msg, i);
+		}
+
+		buf_size = sge->length - msg->sg.copybreak;
+		copy = (buf_size > bytes) ? bytes : buf_size;
+		to = sg_virt(sge) + msg->sg.copybreak;
+		msg->sg.copybreak += copy;
+		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+			ret = copy_from_iter_nocache(to, copy, from);
+		else
+			ret = copy_from_iter(to, copy, from);
+		if (ret != copy) {
+			ret = -EFAULT;
+			goto out;
+		}
+		bytes -= copy;
+		if (!bytes)
+			break;
+		msg->sg.copybreak = 0;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+out:
+	msg->sg.curr = i;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk = psock->sk;
+	int copied = 0, num_sge;
+	struct sk_msg *msg;
+
+	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+	if (unlikely(!msg))
+		return -EAGAIN;
+	if (!sk_rmem_schedule(sk, skb, skb->len)) {
+		kfree(msg);
+		return -EAGAIN;
+	}
+
+	sk_msg_init(msg);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	if (unlikely(num_sge < 0)) {
+		kfree(msg);
+		return num_sge;
+	}
+
+	sk_mem_charge(sk, skb->len);
+	copied = skb->len;
+	msg->sg.start = 0;
+	msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge;
+	msg->skb = skb;
+
+	sk_psock_queue_msg(psock, msg);
+	sk->sk_data_ready(sk);
+	return copied;
+}
+
+static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+			       u32 off, u32 len, bool ingress)
+{
+	if (ingress)
+		return sk_psock_skb_ingress(psock, skb);
+	else
+		return skb_send_sock_locked(psock->sk, skb, off, len);
+}
+
+static void sk_psock_backlog(struct work_struct *work)
+{
+	struct sk_psock *psock = container_of(work, struct sk_psock, work);
+	struct sk_psock_work_state *state = &psock->work_state;
+	struct sk_buff *skb;
+	bool ingress;
+	u32 len, off;
+	int ret;
+
+	/* Lock sock to avoid losing sk_socket during loop. */
+	lock_sock(psock->sk);
+	if (state->skb) {
+		skb = state->skb;
+		len = state->len;
+		off = state->off;
+		state->skb = NULL;
+		goto start;
+	}
+
+	while ((skb = skb_dequeue(&psock->ingress_skb))) {
+		len = skb->len;
+		off = 0;
+start:
+		ingress = tcp_skb_bpf_ingress(skb);
+		do {
+			ret = -EIO;
+			if (likely(psock->sk->sk_socket))
+				ret = sk_psock_handle_skb(psock, skb, off,
+							  len, ingress);
+			if (ret <= 0) {
+				if (ret == -EAGAIN) {
+					state->skb = skb;
+					state->len = len;
+					state->off = off;
+					goto end;
+				}
+				/* Hard errors break pipe and stop xmit. */
+				sk_psock_report_error(psock, ret ? -ret : EPIPE);
+				sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+				kfree_skb(skb);
+				goto end;
+			}
+			off += ret;
+			len -= ret;
+		} while (len);
+
+		if (!ingress)
+			kfree_skb(skb);
+	}
+end:
+	release_sock(psock->sk);
+}
+
+struct sk_psock *sk_psock_init(struct sock *sk, int node)
+{
+	struct sk_psock *psock = kzalloc_node(sizeof(*psock),
+					      GFP_ATOMIC | __GFP_NOWARN,
+					      node);
+	if (!psock)
+		return NULL;
+
+	psock->sk = sk;
+	psock->eval =  __SK_NONE;
+
+	INIT_LIST_HEAD(&psock->link);
+	spin_lock_init(&psock->link_lock);
+
+	INIT_WORK(&psock->work, sk_psock_backlog);
+	INIT_LIST_HEAD(&psock->ingress_msg);
+	skb_queue_head_init(&psock->ingress_skb);
+
+	sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
+	refcount_set(&psock->refcnt, 1);
+
+	rcu_assign_sk_user_data(sk, psock);
+	sock_hold(sk);
+
+	return psock;
+}
+EXPORT_SYMBOL_GPL(sk_psock_init);
+
+struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	spin_lock_bh(&psock->link_lock);
+	link = list_first_entry_or_null(&psock->link, struct sk_psock_link,
+					list);
+	if (link)
+		list_del(&link->list);
+	spin_unlock_bh(&psock->link_lock);
+	return link;
+}
+
+void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
+		list_del(&msg->list);
+		sk_msg_free(psock->sk, msg);
+		kfree(msg);
+	}
+}
+
+static void sk_psock_zap_ingress(struct sk_psock *psock)
+{
+	__skb_queue_purge(&psock->ingress_skb);
+	__sk_psock_purge_ingress_msg(psock);
+}
+
+static void sk_psock_link_destroy(struct sk_psock *psock)
+{
+	struct sk_psock_link *link, *tmp;
+
+	list_for_each_entry_safe(link, tmp, &psock->link, list) {
+		list_del(&link->list);
+		sk_psock_free_link(link);
+	}
+}
+
+static void sk_psock_destroy_deferred(struct work_struct *gc)
+{
+	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
+
+	/* No sk_callback_lock since already detached. */
+	if (psock->parser.enabled)
+		strp_done(&psock->parser.strp);
+
+	cancel_work_sync(&psock->work);
+
+	psock_progs_drop(&psock->progs);
+
+	sk_psock_link_destroy(psock);
+	sk_psock_cork_free(psock);
+	sk_psock_zap_ingress(psock);
+
+	if (psock->sk_redir)
+		sock_put(psock->sk_redir);
+	sock_put(psock->sk);
+	kfree(psock);
+}
+
+void sk_psock_destroy(struct rcu_head *rcu)
+{
+	struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
+
+	INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
+	schedule_work(&psock->gc);
+}
+EXPORT_SYMBOL_GPL(sk_psock_destroy);
+
+void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
+{
+	rcu_assign_sk_user_data(sk, NULL);
+	sk_psock_cork_free(psock);
+	sk_psock_restore_proto(sk, psock);
+
+	write_lock_bh(&sk->sk_callback_lock);
+	if (psock->progs.skb_parser)
+		sk_psock_stop_strp(sk, psock);
+	write_unlock_bh(&sk->sk_callback_lock);
+	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+
+	call_rcu_sched(&psock->rcu, sk_psock_destroy);
+}
+EXPORT_SYMBOL_GPL(sk_psock_drop);
+
+static int sk_psock_map_verd(int verdict, bool redir)
+{
+	switch (verdict) {
+	case SK_PASS:
+		return redir ? __SK_REDIRECT : __SK_PASS;
+	case SK_DROP:
+	default:
+		break;
+	}
+
+	return __SK_DROP;
+}
+
+int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+			 struct sk_msg *msg)
+{
+	struct bpf_prog *prog;
+	int ret;
+
+	preempt_disable();
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.msg_parser);
+	if (unlikely(!prog)) {
+		ret = __SK_PASS;
+		goto out;
+	}
+
+	sk_msg_compute_data_pointers(msg);
+	msg->sk = sk;
+	ret = BPF_PROG_RUN(prog, msg);
+	ret = sk_psock_map_verd(ret, msg->sk_redir);
+	psock->apply_bytes = msg->apply_bytes;
+	if (ret == __SK_REDIRECT) {
+		if (psock->sk_redir)
+			sock_put(psock->sk_redir);
+		psock->sk_redir = msg->sk_redir;
+		if (!psock->sk_redir) {
+			ret = __SK_DROP;
+			goto out;
+		}
+		sock_hold(psock->sk_redir);
+	}
+out:
+	rcu_read_unlock();
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
+
+static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
+			    struct sk_buff *skb)
+{
+	int ret;
+
+	skb->sk = psock->sk;
+	bpf_compute_data_end_sk_skb(skb);
+	preempt_disable();
+	ret = BPF_PROG_RUN(prog, skb);
+	preempt_enable();
+	/* strparser clones the skb before handing it to a upper layer,
+	 * meaning skb_orphan has been called. We NULL sk on the way out
+	 * to ensure we don't trigger a BUG_ON() in skb/sk operations
+	 * later and because we are not charging the memory of this skb
+	 * to any socket yet.
+	 */
+	skb->sk = NULL;
+	return ret;
+}
+
+static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
+{
+	struct sk_psock_parser *parser;
+
+	parser = container_of(strp, struct sk_psock_parser, strp);
+	return container_of(parser, struct sk_psock, parser);
+}
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+				   struct sk_buff *skb, int verdict)
+{
+	struct sk_psock *psock_other;
+	struct sock *sk_other;
+	bool ingress;
+
+	switch (verdict) {
+	case __SK_REDIRECT:
+		sk_other = tcp_skb_bpf_redirect_fetch(skb);
+		if (unlikely(!sk_other))
+			goto out_free;
+		psock_other = sk_psock(sk_other);
+		if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+		    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
+			goto out_free;
+		ingress = tcp_skb_bpf_ingress(skb);
+		if ((!ingress && sock_writeable(sk_other)) ||
+		    (ingress &&
+		     atomic_read(&sk_other->sk_rmem_alloc) <=
+		     sk_other->sk_rcvbuf)) {
+			if (!ingress)
+				skb_set_owner_w(skb, sk_other);
+			skb_queue_tail(&psock_other->ingress_skb, skb);
+			schedule_work(&psock_other->work);
+			break;
+		}
+		/* fall-through */
+	case __SK_DROP:
+		/* fall-through */
+	default:
+out_free:
+		kfree_skb(skb);
+	}
+}
+
+static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
+{
+	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct bpf_prog *prog;
+	int ret = __SK_DROP;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_verdict);
+	if (likely(prog)) {
+		skb_orphan(skb);
+		tcp_skb_bpf_redirect_clear(skb);
+		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+	}
+	rcu_read_unlock();
+	sk_psock_verdict_apply(psock, skb, ret);
+}
+
+static int sk_psock_strp_read_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct bpf_prog *prog;
+	int ret = skb->len;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_parser);
+	if (likely(prog))
+		ret = sk_psock_bpf_run(psock, prog, skb);
+	rcu_read_unlock();
+	return ret;
+}
+
+/* Called with socket lock held. */
+static void sk_psock_data_ready(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock)) {
+		write_lock_bh(&sk->sk_callback_lock);
+		strp_data_ready(&psock->parser.strp);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	rcu_read_unlock();
+}
+
+static void sk_psock_write_space(struct sock *sk)
+{
+	struct sk_psock *psock;
+	void (*write_space)(struct sock *sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock && sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)))
+		schedule_work(&psock->work);
+	write_space = psock->saved_write_space;
+	rcu_read_unlock();
+	write_space(sk);
+}
+
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	static const struct strp_callbacks cb = {
+		.rcv_msg	= sk_psock_strp_read,
+		.read_sock_done	= sk_psock_strp_read_done,
+		.parse_msg	= sk_psock_strp_parse,
+	};
+
+	psock->parser.enabled = false;
+	return strp_init(&psock->parser.strp, sk, &cb);
+}
+
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (parser->enabled)
+		return;
+
+	parser->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = sk_psock_data_ready;
+	sk->sk_write_space = sk_psock_write_space;
+	parser->enabled = true;
+}
+
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (!parser->enabled)
+		return;
+
+	sk->sk_data_ready = parser->saved_data_ready;
+	parser->saved_data_ready = NULL;
+	strp_stop(&parser->strp);
+	parser->enabled = false;
+}
diff --git a/net/core/sock.c b/net/core/sock.c
index fdf9fc7d3f98..6fcc4bc07d19 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2239,67 +2239,6 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
-		int first_coalesce)
-{
-	int sg_curr = *sg_curr_index, use = 0, rc = 0;
-	unsigned int size = *sg_curr_size;
-	struct page_frag *pfrag;
-	struct scatterlist *sge;
-
-	len -= size;
-	pfrag = sk_page_frag(sk);
-
-	while (len > 0) {
-		unsigned int orig_offset;
-
-		if (!sk_page_frag_refill(sk, pfrag)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		use = min_t(int, len, pfrag->size - pfrag->offset);
-
-		if (!sk_wmem_schedule(sk, use)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		sk_mem_charge(sk, use);
-		size += use;
-		orig_offset = pfrag->offset;
-		pfrag->offset += use;
-
-		sge = sg + sg_curr - 1;
-		if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
-		    sge->offset + sge->length == orig_offset) {
-			sge->length += use;
-		} else {
-			sge = sg + sg_curr;
-			sg_unmark_end(sge);
-			sg_set_page(sge, pfrag->page, use, orig_offset);
-			get_page(pfrag->page);
-			sg_curr++;
-
-			if (sg_curr == MAX_SKB_FRAGS)
-				sg_curr = 0;
-
-			if (sg_curr == sg_start) {
-				rc = -ENOSPC;
-				break;
-			}
-		}
-
-		len -= use;
-	}
-out:
-	*sg_curr_size = size;
-	*sg_curr_index = sg_curr;
-	return rc;
-}
-EXPORT_SYMBOL(sk_alloc_sg);
-
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
new file mode 100644
index 000000000000..3c0e44cb811a
--- /dev/null
+++ b/net/core/sock_map.c
@@ -0,0 +1,1002 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/net.h>
+#include <linux/workqueue.h>
+#include <linux/skmsg.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+struct bpf_stab {
+	struct bpf_map map;
+	struct sock **sks;
+	struct sk_psock_progs progs;
+	raw_spinlock_t lock;
+};
+
+#define SOCK_CREATE_FLAG_MASK				\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_stab *stab;
+	u64 cost;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (attr->max_entries == 0 ||
+	    attr->key_size    != 4 ||
+	    attr->value_size  != 4 ||
+	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+
+	stab = kzalloc(sizeof(*stab), GFP_USER);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&stab->map, attr);
+	raw_spin_lock_init(&stab->lock);
+
+	/* Make sure page count doesn't overflow. */
+	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+	if (cost >= U32_MAX - PAGE_SIZE) {
+		err = -EINVAL;
+		goto free_stab;
+	}
+
+	stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+	err = bpf_map_precharge_memlock(stab->map.pages);
+	if (err)
+		goto free_stab;
+
+	stab->sks = bpf_map_area_alloc(stab->map.max_entries *
+				       sizeof(struct sock *),
+				       stab->map.numa_node);
+	if (stab->sks)
+		return &stab->map;
+	err = -ENOMEM;
+free_stab:
+	kfree(stab);
+	return ERR_PTR(err);
+}
+
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	u32 ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int ret;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	ret = sock_map_prog_update(map, prog, attr->attach_type);
+	fdput(f);
+	return ret;
+}
+
+static void sock_map_sk_acquire(struct sock *sk)
+	__acquires(&sk->sk_lock.slock)
+{
+	lock_sock(sk);
+	preempt_disable();
+	rcu_read_lock();
+}
+
+static void sock_map_sk_release(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+{
+	rcu_read_unlock();
+	preempt_enable();
+	release_sock(sk);
+}
+
+static void sock_map_add_link(struct sk_psock *psock,
+			      struct sk_psock_link *link,
+			      struct bpf_map *map, void *link_raw)
+{
+	link->link_raw = link_raw;
+	link->map = map;
+	spin_lock_bh(&psock->link_lock);
+	list_add_tail(&link->list, &psock->link);
+	spin_unlock_bh(&psock->link_lock);
+}
+
+static void sock_map_del_link(struct sock *sk,
+			      struct sk_psock *psock, void *link_raw)
+{
+	struct sk_psock_link *link, *tmp;
+	bool strp_stop = false;
+
+	spin_lock_bh(&psock->link_lock);
+	list_for_each_entry_safe(link, tmp, &psock->link, list) {
+		if (link->link_raw == link_raw) {
+			struct bpf_map *map = link->map;
+			struct bpf_stab *stab = container_of(map, struct bpf_stab,
+							     map);
+			if (psock->parser.enabled && stab->progs.skb_parser)
+				strp_stop = true;
+			list_del(&link->list);
+			sk_psock_free_link(link);
+		}
+	}
+	spin_unlock_bh(&psock->link_lock);
+	if (strp_stop) {
+		write_lock_bh(&sk->sk_callback_lock);
+		sk_psock_stop_strp(sk, psock);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+}
+
+static void sock_map_unref(struct sock *sk, void *link_raw)
+{
+	struct sk_psock *psock = sk_psock(sk);
+
+	if (likely(psock)) {
+		sock_map_del_link(sk, psock, link_raw);
+		sk_psock_put(sk, psock);
+	}
+}
+
+static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
+			 struct sock *sk)
+{
+	struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
+	bool skb_progs, sk_psock_is_new = false;
+	struct sk_psock *psock;
+	int ret;
+
+	skb_verdict = READ_ONCE(progs->skb_verdict);
+	skb_parser = READ_ONCE(progs->skb_parser);
+	skb_progs = skb_parser && skb_verdict;
+	if (skb_progs) {
+		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
+		if (IS_ERR(skb_verdict))
+			return PTR_ERR(skb_verdict);
+		skb_parser = bpf_prog_inc_not_zero(skb_parser);
+		if (IS_ERR(skb_parser)) {
+			bpf_prog_put(skb_verdict);
+			return PTR_ERR(skb_parser);
+		}
+	}
+
+	msg_parser = READ_ONCE(progs->msg_parser);
+	if (msg_parser) {
+		msg_parser = bpf_prog_inc_not_zero(msg_parser);
+		if (IS_ERR(msg_parser)) {
+			ret = PTR_ERR(msg_parser);
+			goto out;
+		}
+	}
+
+	psock = sk_psock_get(sk);
+	if (psock) {
+		if (!sk_has_psock(sk)) {
+			ret = -EBUSY;
+			goto out_progs;
+		}
+		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
+		    (skb_progs  && READ_ONCE(psock->progs.skb_parser))) {
+			sk_psock_put(sk, psock);
+			ret = -EBUSY;
+			goto out_progs;
+		}
+	} else {
+		psock = sk_psock_init(sk, map->numa_node);
+		if (!psock) {
+			ret = -ENOMEM;
+			goto out_progs;
+		}
+		sk_psock_is_new = true;
+	}
+
+	if (msg_parser)
+		psock_set_prog(&psock->progs.msg_parser, msg_parser);
+	if (sk_psock_is_new) {
+		ret = tcp_bpf_init(sk);
+		if (ret < 0)
+			goto out_drop;
+	} else {
+		tcp_bpf_reinit(sk);
+	}
+
+	write_lock_bh(&sk->sk_callback_lock);
+	if (skb_progs && !psock->parser.enabled) {
+		ret = sk_psock_init_strp(sk, psock);
+		if (ret) {
+			write_unlock_bh(&sk->sk_callback_lock);
+			goto out_drop;
+		}
+		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+		psock_set_prog(&psock->progs.skb_parser, skb_parser);
+		sk_psock_start_strp(sk, psock);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+	return 0;
+out_drop:
+	sk_psock_put(sk, psock);
+out_progs:
+	if (msg_parser)
+		bpf_prog_put(msg_parser);
+out:
+	if (skb_progs) {
+		bpf_prog_put(skb_verdict);
+		bpf_prog_put(skb_parser);
+	}
+	return ret;
+}
+
+static void sock_map_free(struct bpf_map *map)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	int i;
+
+	synchronize_rcu();
+	rcu_read_lock();
+	raw_spin_lock_bh(&stab->lock);
+	for (i = 0; i < stab->map.max_entries; i++) {
+		struct sock **psk = &stab->sks[i];
+		struct sock *sk;
+
+		sk = xchg(psk, NULL);
+		if (sk)
+			sock_map_unref(sk, psk);
+	}
+	raw_spin_unlock_bh(&stab->lock);
+	rcu_read_unlock();
+
+	bpf_map_area_free(stab->sks);
+	kfree(stab);
+}
+
+static void sock_map_release_progs(struct bpf_map *map)
+{
+	psock_progs_drop(&container_of(map, struct bpf_stab, map)->progs);
+}
+
+static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (unlikely(key >= map->max_entries))
+		return NULL;
+	return READ_ONCE(stab->sks[key]);
+}
+
+static void *sock_map_lookup(struct bpf_map *map, void *key)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+			     struct sock **psk)
+{
+	struct sock *sk;
+
+	raw_spin_lock_bh(&stab->lock);
+	sk = *psk;
+	if (!sk_test || sk_test == sk)
+		*psk = NULL;
+	raw_spin_unlock_bh(&stab->lock);
+	if (unlikely(!sk))
+		return -EINVAL;
+	sock_map_unref(sk, psk);
+	return 0;
+}
+
+static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
+				      void *link_raw)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	__sock_map_delete(stab, sk, link_raw);
+}
+
+static int sock_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = *(u32 *)key;
+	struct sock **psk;
+
+	if (unlikely(i >= map->max_entries))
+		return -EINVAL;
+
+	psk = &stab->sks[i];
+	return __sock_map_delete(stab, NULL, psk);
+}
+
+static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = key ? *(u32 *)key : U32_MAX;
+	u32 *key_next = next;
+
+	if (i == stab->map.max_entries - 1)
+		return -ENOENT;
+	if (i >= stab->map.max_entries)
+		*key_next = 0;
+	else
+		*key_next = i + 1;
+	return 0;
+}
+
+static int sock_map_update_common(struct bpf_map *map, u32 idx,
+				  struct sock *sk, u64 flags)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	struct sk_psock_link *link;
+	struct sk_psock *psock;
+	struct sock *osk;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(idx >= map->max_entries))
+		return -E2BIG;
+
+	link = sk_psock_init_link();
+	if (!link)
+		return -ENOMEM;
+
+	ret = sock_map_link(map, &stab->progs, sk);
+	if (ret < 0)
+		goto out_free;
+
+	psock = sk_psock(sk);
+	WARN_ON_ONCE(!psock);
+
+	raw_spin_lock_bh(&stab->lock);
+	osk = stab->sks[idx];
+	if (osk && flags == BPF_NOEXIST) {
+		ret = -EEXIST;
+		goto out_unlock;
+	} else if (!osk && flags == BPF_EXIST) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	sock_map_add_link(psock, link, map, &stab->sks[idx]);
+	stab->sks[idx] = sk;
+	if (osk)
+		sock_map_unref(osk, &stab->sks[idx]);
+	raw_spin_unlock_bh(&stab->lock);
+	return 0;
+out_unlock:
+	raw_spin_unlock_bh(&stab->lock);
+	if (psock)
+		sk_psock_put(sk, psock);
+out_free:
+	sk_psock_free_link(link);
+	return ret;
+}
+
+static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
+
+static bool sock_map_sk_is_suitable(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM &&
+	       sk->sk_protocol == IPPROTO_TCP;
+}
+
+static int sock_map_update_elem(struct bpf_map *map, void *key,
+				void *value, u64 flags)
+{
+	u32 ufd = *(u32 *)value;
+	u32 idx = *(u32 *)key;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	sock = sockfd_lookup(ufd, &ret);
+	if (!sock)
+		return ret;
+	sk = sock->sk;
+	if (!sk) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!sock_map_sk_is_suitable(sk) ||
+	    sk->sk_state != TCP_ESTABLISHED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	sock_map_sk_acquire(sk);
+	ret = sock_map_update_common(map, idx, sk, flags);
+	sock_map_sk_release(sk);
+out:
+	fput(sock->file);
+	return ret;
+}
+
+BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(sock_map_sk_is_suitable(sops->sk) &&
+		   sock_map_op_okay(sops)))
+		return sock_map_update_common(map, *(u32 *)key, sops->sk,
+					      flags);
+	return -EOPNOTSUPP;
+}
+
+const struct bpf_func_proto bpf_sock_map_update_proto = {
+	.func		= bpf_sock_map_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
+	if (!tcb->bpf.sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+	.func           = bpf_sk_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	msg->flags = flags;
+	msg->sk_redir = __sock_map_lookup_elem(map, key);
+	if (!msg->sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+	.func           = bpf_msg_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+const struct bpf_map_ops sock_map_ops = {
+	.map_alloc		= sock_map_alloc,
+	.map_free		= sock_map_free,
+	.map_get_next_key	= sock_map_get_next_key,
+	.map_update_elem	= sock_map_update_elem,
+	.map_delete_elem	= sock_map_delete_elem,
+	.map_lookup_elem	= sock_map_lookup,
+	.map_release_uref	= sock_map_release_progs,
+	.map_check_btf		= map_check_no_btf,
+};
+
+struct bpf_htab_elem {
+	struct rcu_head rcu;
+	u32 hash;
+	struct sock *sk;
+	struct hlist_node node;
+	u8 key[0];
+};
+
+struct bpf_htab_bucket {
+	struct hlist_head head;
+	raw_spinlock_t lock;
+};
+
+struct bpf_htab {
+	struct bpf_map map;
+	struct bpf_htab_bucket *buckets;
+	u32 buckets_num;
+	u32 elem_size;
+	struct sk_psock_progs progs;
+	atomic_t count;
+};
+
+static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
+{
+	return jhash(key, len, 0);
+}
+
+static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
+						       u32 hash)
+{
+	return &htab->buckets[hash & (htab->buckets_num - 1)];
+}
+
+static struct bpf_htab_elem *
+sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
+			  u32 key_size)
+{
+	struct bpf_htab_elem *elem;
+
+	hlist_for_each_entry_rcu(elem, head, node) {
+		if (elem->hash == hash &&
+		    !memcmp(&elem->key, key, key_size))
+			return elem;
+	}
+
+	return NULL;
+}
+
+static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 key_size = map->key_size, hash;
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+
+	return elem ? elem->sk : NULL;
+}
+
+static void sock_hash_free_elem(struct bpf_htab *htab,
+				struct bpf_htab_elem *elem)
+{
+	atomic_dec(&htab->count);
+	kfree_rcu(elem, rcu);
+}
+
+static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
+				       void *link_raw)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_elem *elem_probe, *elem = link_raw;
+	struct bpf_htab_bucket *bucket;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	bucket = sock_hash_select_bucket(htab, elem->hash);
+
+	/* elem may be deleted in parallel from the map, but access here
+	 * is okay since it's going away only after RCU grace period.
+	 * However, we need to check whether it's still present.
+	 */
+	raw_spin_lock_bh(&bucket->lock);
+	elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
+					       elem->key, map->key_size);
+	if (elem_probe && elem_probe == elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+}
+
+static int sock_hash_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 hash, key_size = map->key_size;
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+	int ret = -ENOENT;
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+
+	raw_spin_lock_bh(&bucket->lock);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+	if (elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+		ret = 0;
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+	return ret;
+}
+
+static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
+						  void *key, u32 key_size,
+						  u32 hash, struct sock *sk,
+						  struct bpf_htab_elem *old)
+{
+	struct bpf_htab_elem *new;
+
+	if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+		if (!old) {
+			atomic_dec(&htab->count);
+			return ERR_PTR(-E2BIG);
+		}
+	}
+
+	new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
+			   htab->map.numa_node);
+	if (!new) {
+		atomic_dec(&htab->count);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(new->key, key, key_size);
+	new->sk = sk;
+	new->hash = hash;
+	return new;
+}
+
+static int sock_hash_update_common(struct bpf_map *map, void *key,
+				   struct sock *sk, u64 flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 key_size = map->key_size, hash;
+	struct bpf_htab_elem *elem, *elem_new;
+	struct bpf_htab_bucket *bucket;
+	struct sk_psock_link *link;
+	struct sk_psock *psock;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+
+	link = sk_psock_init_link();
+	if (!link)
+		return -ENOMEM;
+
+	ret = sock_map_link(map, &htab->progs, sk);
+	if (ret < 0)
+		goto out_free;
+
+	psock = sk_psock(sk);
+	WARN_ON_ONCE(!psock);
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+
+	raw_spin_lock_bh(&bucket->lock);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+	if (elem && flags == BPF_NOEXIST) {
+		ret = -EEXIST;
+		goto out_unlock;
+	} else if (!elem && flags == BPF_EXIST) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	elem_new = sock_hash_alloc_elem(htab, key, key_size, hash, sk, elem);
+	if (IS_ERR(elem_new)) {
+		ret = PTR_ERR(elem_new);
+		goto out_unlock;
+	}
+
+	sock_map_add_link(psock, link, map, elem_new);
+	/* Add new element to the head of the list, so that
+	 * concurrent search will find it before old elem.
+	 */
+	hlist_add_head_rcu(&elem_new->node, &bucket->head);
+	if (elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+	return 0;
+out_unlock:
+	raw_spin_unlock_bh(&bucket->lock);
+	sk_psock_put(sk, psock);
+out_free:
+	sk_psock_free_link(link);
+	return ret;
+}
+
+static int sock_hash_update_elem(struct bpf_map *map, void *key,
+				 void *value, u64 flags)
+{
+	u32 ufd = *(u32 *)value;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	sock = sockfd_lookup(ufd, &ret);
+	if (!sock)
+		return ret;
+	sk = sock->sk;
+	if (!sk) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!sock_map_sk_is_suitable(sk) ||
+	    sk->sk_state != TCP_ESTABLISHED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	sock_map_sk_acquire(sk);
+	ret = sock_hash_update_common(map, key, sk, flags);
+	sock_map_sk_release(sk);
+out:
+	fput(sock->file);
+	return ret;
+}
+
+static int sock_hash_get_next_key(struct bpf_map *map, void *key,
+				  void *key_next)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_elem *elem, *elem_next;
+	u32 hash, key_size = map->key_size;
+	struct hlist_head *head;
+	int i = 0;
+
+	if (!key)
+		goto find_first_elem;
+	hash = sock_hash_bucket_hash(key, key_size);
+	head = &sock_hash_select_bucket(htab, hash)->head;
+	elem = sock_hash_lookup_elem_raw(head, hash, key, key_size);
+	if (!elem)
+		goto find_first_elem;
+
+	elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
+				     struct bpf_htab_elem, node);
+	if (elem_next) {
+		memcpy(key_next, elem_next->key, key_size);
+		return 0;
+	}
+
+	i = hash & (htab->buckets_num - 1);
+	i++;
+find_first_elem:
+	for (; i < htab->buckets_num; i++) {
+		head = &sock_hash_select_bucket(htab, i)->head;
+		elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+					     struct bpf_htab_elem, node);
+		if (elem_next) {
+			memcpy(key_next, elem_next->key, key_size);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
+{
+	struct bpf_htab *htab;
+	int i, err;
+	u64 cost;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (attr->max_entries == 0 ||
+	    attr->key_size    == 0 ||
+	    attr->value_size  != 4 ||
+	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+	if (attr->key_size > MAX_BPF_STACK)
+		return ERR_PTR(-E2BIG);
+
+	htab = kzalloc(sizeof(*htab), GFP_USER);
+	if (!htab)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&htab->map, attr);
+
+	htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
+	htab->elem_size = sizeof(struct bpf_htab_elem) +
+			  round_up(htab->map.key_size, 8);
+	if (htab->buckets_num == 0 ||
+	    htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+		err = -EINVAL;
+		goto free_htab;
+	}
+
+	cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+	       (u64) htab->elem_size * htab->map.max_entries;
+	if (cost >= U32_MAX - PAGE_SIZE) {
+		err = -EINVAL;
+		goto free_htab;
+	}
+
+	htab->buckets = bpf_map_area_alloc(htab->buckets_num *
+					   sizeof(struct bpf_htab_bucket),
+					   htab->map.numa_node);
+	if (!htab->buckets) {
+		err = -ENOMEM;
+		goto free_htab;
+	}
+
+	for (i = 0; i < htab->buckets_num; i++) {
+		INIT_HLIST_HEAD(&htab->buckets[i].head);
+		raw_spin_lock_init(&htab->buckets[i].lock);
+	}
+
+	return &htab->map;
+free_htab:
+	kfree(htab);
+	return ERR_PTR(err);
+}
+
+static void sock_hash_free(struct bpf_map *map)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+	struct hlist_node *node;
+	int i;
+
+	synchronize_rcu();
+	rcu_read_lock();
+	for (i = 0; i < htab->buckets_num; i++) {
+		bucket = sock_hash_select_bucket(htab, i);
+		raw_spin_lock_bh(&bucket->lock);
+		hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
+			hlist_del_rcu(&elem->node);
+			sock_map_unref(elem->sk, elem);
+		}
+		raw_spin_unlock_bh(&bucket->lock);
+	}
+	rcu_read_unlock();
+
+	bpf_map_area_free(htab->buckets);
+	kfree(htab);
+}
+
+static void sock_hash_release_progs(struct bpf_map *map)
+{
+	psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+}
+
+BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(sock_map_sk_is_suitable(sops->sk) &&
+		   sock_map_op_okay(sops)))
+		return sock_hash_update_common(map, key, sops->sk, flags);
+	return -EOPNOTSUPP;
+}
+
+const struct bpf_func_proto bpf_sock_hash_update_proto = {
+	.func		= bpf_sock_hash_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
+	if (!tcb->bpf.sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
+	.func           = bpf_sk_redirect_hash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	msg->flags = flags;
+	msg->sk_redir = __sock_hash_lookup_elem(map, key);
+	if (!msg->sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
+	.func           = bpf_msg_redirect_hash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+const struct bpf_map_ops sock_hash_ops = {
+	.map_alloc		= sock_hash_alloc,
+	.map_free		= sock_hash_free,
+	.map_get_next_key	= sock_hash_get_next_key,
+	.map_update_elem	= sock_hash_update_elem,
+	.map_delete_elem	= sock_hash_delete_elem,
+	.map_lookup_elem	= sock_map_lookup,
+	.map_release_uref	= sock_hash_release_progs,
+	.map_check_btf		= map_check_no_btf,
+};
+
+static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
+{
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return &container_of(map, struct bpf_stab, map)->progs;
+	case BPF_MAP_TYPE_SOCKHASH:
+		return &container_of(map, struct bpf_htab, map)->progs;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+			 u32 which)
+{
+	struct sk_psock_progs *progs = sock_map_progs(map);
+
+	if (!progs)
+		return -EOPNOTSUPP;
+
+	switch (which) {
+	case BPF_SK_MSG_VERDICT:
+		psock_set_prog(&progs->msg_parser, prog);
+		break;
+	case BPF_SK_SKB_STREAM_PARSER:
+		psock_set_prog(&progs->skb_parser, prog);
+		break;
+	case BPF_SK_SKB_STREAM_VERDICT:
+		psock_set_prog(&progs->skb_verdict, prog);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
+{
+	switch (link->map->map_type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return sock_map_delete_from_link(link->map, sk,
+						 link->link_raw);
+	case BPF_MAP_TYPE_SOCKHASH:
+		return sock_hash_delete_from_link(link->map, sk,
+						  link->link_raw);
+	default:
+		break;
+	}
+}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7446b98661d8..58629314eae9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
new file mode 100644
index 000000000000..80debb0daf37
--- /dev/null
+++ b/net/ipv4/tcp_bpf.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/skmsg.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+
+#include <net/inet_common.h>
+
+static bool tcp_bpf_stream_read(const struct sock *sk)
+{
+	struct sk_psock *psock;
+	bool empty = true;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock))
+		empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
+	return !empty;
+}
+
+static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
+			     int flags, long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = sk_wait_event(sk, &timeo,
+			    !list_empty(&psock->ingress_msg) ||
+			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+		      struct msghdr *msg, int len)
+{
+	struct iov_iter *iter = &msg->msg_iter;
+	int i, ret, copied = 0;
+
+	while (copied != len) {
+		struct scatterlist *sge;
+		struct sk_msg *msg_rx;
+
+		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+						  struct sk_msg, list);
+		if (unlikely(!msg_rx))
+			break;
+
+		i = msg_rx->sg.start;
+		do {
+			struct page *page;
+			int copy;
+
+			sge = sk_msg_elem(msg_rx, i);
+			copy = sge->length;
+			page = sg_page(sge);
+			if (copied + copy > len)
+				copy = len - copied;
+			ret = copy_page_to_iter(page, sge->offset, copy, iter);
+			if (ret != copy) {
+				msg_rx->sg.start = i;
+				return -EFAULT;
+			}
+
+			copied += copy;
+			sge->offset += copy;
+			sge->length -= copy;
+			sk_mem_uncharge(sk, copy);
+			if (!sge->length) {
+				i++;
+				if (i == MAX_SKB_FRAGS)
+					i = 0;
+				if (!msg_rx->skb)
+					put_page(page);
+			}
+
+			if (copied == len)
+				break;
+		} while (i != msg_rx->sg.end);
+
+		msg_rx->sg.start = i;
+		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+			list_del(&msg_rx->list);
+			if (msg_rx->skb)
+				consume_skb(msg_rx->skb);
+			kfree(msg_rx);
+		}
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
+
+int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		    int nonblock, int flags, int *addr_len)
+{
+	struct sk_psock *psock;
+	int copied, ret;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+	lock_sock(sk);
+msg_bytes_ready:
+	copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
+	if (!copied) {
+		int data, err = 0;
+		long timeo;
+
+		timeo = sock_rcvtimeo(sk, nonblock);
+		data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+		if (data) {
+			if (skb_queue_empty(&sk->sk_receive_queue))
+				goto msg_bytes_ready;
+			release_sock(sk);
+			sk_psock_put(sk, psock);
+			return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+		}
+		if (err) {
+			ret = err;
+			goto out;
+		}
+	}
+	ret = copied;
+out:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+
+static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+			   struct sk_msg *msg, u32 apply_bytes, int flags)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sge;
+	u32 size, copied = 0;
+	struct sk_msg *tmp;
+	int i, ret = 0;
+
+	tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
+	if (unlikely(!tmp))
+		return -ENOMEM;
+
+	lock_sock(sk);
+	tmp->sg.start = msg->sg.start;
+	i = msg->sg.start;
+	do {
+		sge = sk_msg_elem(msg, i);
+		size = (apply && apply_bytes < sge->length) ?
+			apply_bytes : sge->length;
+		if (!sk_wmem_schedule(sk, size)) {
+			if (!copied)
+				ret = -ENOMEM;
+			break;
+		}
+
+		sk_mem_charge(sk, size);
+		sk_msg_xfer(tmp, msg, i, size);
+		copied += size;
+		if (sge->length)
+			get_page(sk_msg_page(tmp, i));
+		sk_msg_iter_var_next(i);
+		tmp->sg.end = i;
+		if (apply) {
+			apply_bytes -= size;
+			if (!apply_bytes)
+				break;
+		}
+	} while (i != msg->sg.end);
+
+	if (!ret) {
+		msg->sg.start = i;
+		msg->sg.size -= apply_bytes;
+		sk_psock_queue_msg(psock, tmp);
+		sk->sk_data_ready(sk);
+	} else {
+		sk_msg_free(sk, tmp);
+		kfree(tmp);
+	}
+
+	release_sock(sk);
+	return ret;
+}
+
+static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
+			int flags, bool uncharge)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sge;
+	struct page *page;
+	int size, ret = 0;
+	u32 off;
+
+	while (1) {
+		sge = sk_msg_elem(msg, msg->sg.start);
+		size = (apply && apply_bytes < sge->length) ?
+			apply_bytes : sge->length;
+		off  = sge->offset;
+		page = sg_page(sge);
+
+		tcp_rate_check_app_limited(sk);
+retry:
+		ret = do_tcp_sendpages(sk, page, off, size, flags);
+		if (ret <= 0)
+			return ret;
+		if (apply)
+			apply_bytes -= ret;
+		msg->sg.size -= ret;
+		sge->offset += ret;
+		sge->length -= ret;
+		if (uncharge)
+			sk_mem_uncharge(sk, ret);
+		if (ret != size) {
+			size -= ret;
+			off  += ret;
+			goto retry;
+		}
+		if (!sge->length) {
+			put_page(page);
+			sk_msg_iter_next(msg, start);
+			sg_init_table(sge, 1);
+			if (msg->sg.start == msg->sg.end)
+				break;
+		}
+		if (apply && !apply_bytes)
+			break;
+	}
+
+	return 0;
+}
+
+static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
+			       u32 apply_bytes, int flags, bool uncharge)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_bpf_push(sk, msg, apply_bytes, flags, uncharge);
+	release_sock(sk);
+	return ret;
+}
+
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
+			  u32 bytes, int flags)
+{
+	bool ingress = sk_msg_to_ingress(msg);
+	struct sk_psock *psock = sk_psock_get(sk);
+	int ret;
+
+	if (unlikely(!psock)) {
+		sk_msg_free(sk, msg);
+		return 0;
+	}
+	ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+			tcp_bpf_push_locked(sk, msg, bytes, flags, false);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
+
+static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
+				struct sk_msg *msg, int *copied, int flags)
+{
+	bool cork = false, enospc = msg->sg.start == msg->sg.end;
+	struct sock *sk_redir;
+	u32 tosend;
+	int ret;
+
+more_data:
+	if (psock->eval == __SK_NONE)
+		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+
+	if (msg->cork_bytes &&
+	    msg->cork_bytes > msg->sg.size && !enospc) {
+		psock->cork_bytes = msg->cork_bytes - msg->sg.size;
+		if (!psock->cork) {
+			psock->cork = kzalloc(sizeof(*psock->cork),
+					      GFP_ATOMIC | __GFP_NOWARN);
+			if (!psock->cork)
+				return -ENOMEM;
+		}
+		memcpy(psock->cork, msg, sizeof(*msg));
+		return 0;
+	}
+
+	tosend = msg->sg.size;
+	if (psock->apply_bytes && psock->apply_bytes < tosend)
+		tosend = psock->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		ret = tcp_bpf_push(sk, msg, tosend, flags, true);
+		if (unlikely(ret)) {
+			*copied -= sk_msg_free(sk, msg);
+			break;
+		}
+		sk_msg_apply_bytes(psock, tosend);
+		break;
+	case __SK_REDIRECT:
+		sk_redir = psock->sk_redir;
+		sk_msg_apply_bytes(psock, tosend);
+		if (psock->cork) {
+			cork = true;
+			psock->cork = NULL;
+		}
+		sk_msg_return(sk, msg, tosend);
+		release_sock(sk);
+		ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+		lock_sock(sk);
+		if (unlikely(ret < 0)) {
+			int free = sk_msg_free_nocharge(sk, msg);
+
+			if (!cork)
+				*copied -= free;
+		}
+		if (cork) {
+			sk_msg_free(sk, msg);
+			kfree(msg);
+			msg = NULL;
+			ret = 0;
+		}
+		break;
+	case __SK_DROP:
+	default:
+		sk_msg_free_partial(sk, msg, tosend);
+		sk_msg_apply_bytes(psock, tosend);
+		*copied -= tosend;
+		return -EACCES;
+	}
+
+	if (likely(!ret)) {
+		if (!psock->apply_bytes) {
+			psock->eval =  __SK_NONE;
+			if (psock->sk_redir) {
+				sock_put(psock->sk_redir);
+				psock->sk_redir = NULL;
+			}
+		}
+		if (msg &&
+		    msg->sg.data[msg->sg.start].page_link &&
+		    msg->sg.data[msg->sg.start].length)
+			goto more_data;
+	}
+	return ret;
+}
+
+static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct sk_msg tmp, *msg_tx = NULL;
+	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
+	int copied = 0, err = 0;
+	struct sk_psock *psock;
+	long timeo;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_sendmsg(sk, msg, size);
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	while (msg_data_left(msg)) {
+		bool enospc = false;
+		u32 copy, osize;
+
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_err;
+		}
+
+		copy = msg_data_left(msg);
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+		if (psock->cork) {
+			msg_tx = psock->cork;
+		} else {
+			msg_tx = &tmp;
+			sk_msg_init(msg_tx);
+		}
+
+		osize = msg_tx->sg.size;
+		err = sk_msg_alloc(sk, msg_tx, msg_tx->sg.size + copy, msg_tx->sg.end - 1);
+		if (err) {
+			if (err != -ENOSPC)
+				goto wait_for_memory;
+			enospc = true;
+			copy = msg_tx->sg.size - osize;
+		}
+
+		err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
+					       copy);
+		if (err < 0) {
+			sk_msg_trim(sk, msg_tx, osize);
+			goto out_err;
+		}
+
+		copied += copy;
+		if (psock->cork_bytes) {
+			if (size > psock->cork_bytes)
+				psock->cork_bytes = 0;
+			else
+				psock->cork_bytes -= size;
+			if (psock->cork_bytes && !enospc)
+				goto out_err;
+			/* All cork bytes are accounted, rerun the prog. */
+			psock->eval = __SK_NONE;
+			psock->cork_bytes = 0;
+		}
+
+		err = tcp_bpf_send_verdict(sk, psock, msg_tx, &copied, flags);
+		if (unlikely(err < 0))
+			goto out_err;
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err) {
+			if (msg_tx && msg_tx != psock->cork)
+				sk_msg_free(sk, msg_tx);
+			goto out_err;
+		}
+	}
+out_err:
+	if (err < 0)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return copied ? copied : err;
+}
+
+static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
+			    size_t size, int flags)
+{
+	struct sk_msg tmp, *msg = NULL;
+	int err = 0, copied = 0;
+	struct sk_psock *psock;
+	bool enospc = false;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_sendpage(sk, page, offset, size, flags);
+
+	lock_sock(sk);
+	if (psock->cork) {
+		msg = psock->cork;
+	} else {
+		msg = &tmp;
+		sk_msg_init(msg);
+	}
+
+	/* Catch case where ring is full and sendpage is stalled. */
+	if (unlikely(sk_msg_full(msg)))
+		goto out_err;
+
+	sk_msg_page_add(msg, page, size, offset);
+	sk_mem_charge(sk, size);
+	copied = size;
+	if (sk_msg_full(msg))
+		enospc = true;
+	if (psock->cork_bytes) {
+		if (size > psock->cork_bytes)
+			psock->cork_bytes = 0;
+		else
+			psock->cork_bytes -= size;
+		if (psock->cork_bytes && !enospc)
+			goto out_err;
+		/* All cork bytes are accounted, rerun the prog. */
+		psock->eval = __SK_NONE;
+		psock->cork_bytes = 0;
+	}
+
+	err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
+out_err:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return copied ? copied : err;
+}
+
+static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	sk_psock_cork_free(psock);
+	__sk_psock_purge_ingress_msg(psock);
+	while ((link = sk_psock_link_pop(psock))) {
+		sk_psock_unlink(sk, link);
+		sk_psock_free_link(link);
+	}
+}
+
+static void tcp_bpf_unhash(struct sock *sk)
+{
+	void (*saved_unhash)(struct sock *sk);
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+		return;
+	}
+
+	saved_unhash = psock->saved_unhash;
+	tcp_bpf_remove(sk, psock);
+	rcu_read_unlock();
+	saved_unhash(sk);
+}
+
+static void tcp_bpf_close(struct sock *sk, long timeout)
+{
+	void (*saved_close)(struct sock *sk, long timeout);
+	struct sk_psock *psock;
+
+	lock_sock(sk);
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		release_sock(sk);
+		return sk->sk_prot->close(sk, timeout);
+	}
+
+	saved_close = psock->saved_close;
+	tcp_bpf_remove(sk, psock);
+	rcu_read_unlock();
+	release_sock(sk);
+	saved_close(sk, timeout);
+}
+
+enum {
+	TCP_BPF_IPV4,
+	TCP_BPF_IPV6,
+	TCP_BPF_NUM_PROTS,
+};
+
+enum {
+	TCP_BPF_BASE,
+	TCP_BPF_TX,
+	TCP_BPF_NUM_CFGS,
+};
+
+static struct proto *tcpv6_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(tcpv6_prot_lock);
+static struct proto tcp_bpf_prots[TCP_BPF_NUM_PROTS][TCP_BPF_NUM_CFGS];
+
+static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
+				   struct proto *base)
+{
+	prot[TCP_BPF_BASE]			= *base;
+	prot[TCP_BPF_BASE].unhash		= tcp_bpf_unhash;
+	prot[TCP_BPF_BASE].close		= tcp_bpf_close;
+	prot[TCP_BPF_BASE].recvmsg		= tcp_bpf_recvmsg;
+	prot[TCP_BPF_BASE].stream_memory_read	= tcp_bpf_stream_read;
+
+	prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
+	prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
+	prot[TCP_BPF_TX].sendpage		= tcp_bpf_sendpage;
+}
+
+static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
+{
+	if (sk->sk_family == AF_INET6 &&
+	    unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
+		spin_lock_bh(&tcpv6_prot_lock);
+		if (likely(ops != tcpv6_prot_saved)) {
+			tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
+			smp_store_release(&tcpv6_prot_saved, ops);
+		}
+		spin_unlock_bh(&tcpv6_prot_lock);
+	}
+}
+
+static int __init tcp_bpf_v4_build_proto(void)
+{
+	tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
+	return 0;
+}
+core_initcall(tcp_bpf_v4_build_proto);
+
+static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+
+	sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
+}
+
+static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+
+	/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
+	 * or added requiring sk_prot hook updates. We keep original saved
+	 * hooks in this case.
+	 */
+	sk->sk_prot = &tcp_bpf_prots[family][config];
+}
+
+static int tcp_bpf_assert_proto_ops(struct proto *ops)
+{
+	/* In order to avoid retpoline, we make assumptions when we call
+	 * into ops if e.g. a psock is not present. Make sure they are
+	 * indeed valid assumptions.
+	 */
+	return ops->recvmsg  == tcp_recvmsg &&
+	       ops->sendmsg  == tcp_sendmsg &&
+	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
+}
+
+void tcp_bpf_reinit(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	tcp_bpf_reinit_sk_prot(sk, psock);
+	rcu_read_unlock();
+}
+
+int tcp_bpf_init(struct sock *sk)
+{
+	struct proto *ops = READ_ONCE(sk->sk_prot);
+	struct sk_psock *psock;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock || psock->sk_proto ||
+		     tcp_bpf_assert_proto_ops(ops))) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	tcp_bpf_check_v6_needs_rebuild(sk, ops);
+	tcp_bpf_update_sk_prot(sk, psock);
+	rcu_read_unlock();
+	return 0;
+}
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index a5995bb2eaca..a9162aa11af9 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -6,7 +6,7 @@
  *
  */
 
-#include<linux/module.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/list.h>
@@ -29,18 +29,6 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name)
 	return NULL;
 }
 
-static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp)
-{
-	struct tcp_ulp_ops *e;
-
-	list_for_each_entry_rcu(e, &tcp_ulp_list, list) {
-		if (e->uid == ulp)
-			return e;
-	}
-
-	return NULL;
-}
-
 static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
 {
 	const struct tcp_ulp_ops *ulp = NULL;
@@ -63,18 +51,6 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
 	return ulp;
 }
 
-static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid)
-{
-	const struct tcp_ulp_ops *ulp;
-
-	rcu_read_lock();
-	ulp = tcp_ulp_find_id(uid);
-	if (!ulp || !try_module_get(ulp->owner))
-		ulp = NULL;
-	rcu_read_unlock();
-	return ulp;
-}
-
 /* Attach new upper layer protocol to the list
  * of available protocols.
  */
@@ -123,6 +99,8 @@ void tcp_cleanup_ulp(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	sock_owned_by_me(sk);
+
 	if (!icsk->icsk_ulp_ops)
 		return;
 
@@ -133,54 +111,35 @@ void tcp_cleanup_ulp(struct sock *sk)
 	icsk->icsk_ulp_ops = NULL;
 }
 
-/* Change upper layer protocol for socket */
-int tcp_set_ulp(struct sock *sk, const char *name)
+static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	const struct tcp_ulp_ops *ulp_ops;
-	int err = 0;
+	int err;
 
+	err = -EEXIST;
 	if (icsk->icsk_ulp_ops)
-		return -EEXIST;
-
-	ulp_ops = __tcp_ulp_find_autoload(name);
-	if (!ulp_ops)
-		return -ENOENT;
-
-	if (!ulp_ops->user_visible) {
-		module_put(ulp_ops->owner);
-		return -ENOENT;
-	}
+		goto out_err;
 
 	err = ulp_ops->init(sk);
-	if (err) {
-		module_put(ulp_ops->owner);
-		return err;
-	}
+	if (err)
+		goto out_err;
 
 	icsk->icsk_ulp_ops = ulp_ops;
 	return 0;
+out_err:
+	module_put(ulp_ops->owner);
+	return err;
 }
 
-int tcp_set_ulp_id(struct sock *sk, int ulp)
+int tcp_set_ulp(struct sock *sk, const char *name)
 {
-	struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_ulp_ops *ulp_ops;
-	int err;
 
-	if (icsk->icsk_ulp_ops)
-		return -EEXIST;
+	sock_owned_by_me(sk);
 
-	ulp_ops = __tcp_ulp_lookup(ulp);
+	ulp_ops = __tcp_ulp_find_autoload(name);
 	if (!ulp_ops)
 		return -ENOENT;
 
-	err = ulp_ops->init(sk);
-	if (err) {
-		module_put(ulp_ops->owner);
-		return err;
-	}
-
-	icsk->icsk_ulp_ops = ulp_ops;
-	return 0;
+	return __tcp_set_ulp(sk, ulp_ops);
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e9c8cfdf4b4c..3f4d61017a69 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -901,6 +901,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 
 static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
 	.inet6_bind = __inet6_bind,
+	.udp6_lib_lookup = __udp6_lib_lookup,
 };
 
 static int __init inet6_init(void)
diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
index 6cff3f6d0c3a..94da19a2a220 100644
--- a/net/strparser/Kconfig
+++ b/net/strparser/Kconfig
@@ -1,4 +1,2 @@
-
 config STREAM_PARSER
-	tristate
-	default n
+	def_bool n
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 73f05ece53d0..99c1a19c17b1 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -8,6 +8,7 @@ config TLS
 	select CRYPTO_AES
 	select CRYPTO_GCM
 	select STREAM_PARSER
+	select NET_SOCK_MSG
 	default n
 	---help---
 	Enable kernel support for TLS protocol. This allows symmetric
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 961b07d4d41c..276edbc04f38 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -421,7 +421,7 @@ last_record:
 			tls_push_record_flags = flags;
 			if (more) {
 				tls_ctx->pending_open_record_frags =
-						record->num_frags;
+						!!record->num_frags;
 				break;
 			}
 
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b428069a1b05..e90b6d537077 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -620,12 +620,14 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_SW][TLS_BASE].sendpage		= tls_sw_sendpage;
 
 	prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
-	prot[TLS_BASE][TLS_SW].recvmsg		= tls_sw_recvmsg;
-	prot[TLS_BASE][TLS_SW].close		= tls_sk_proto_close;
+	prot[TLS_BASE][TLS_SW].recvmsg		  = tls_sw_recvmsg;
+	prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
+	prot[TLS_BASE][TLS_SW].close		  = tls_sk_proto_close;
 
 	prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
-	prot[TLS_SW][TLS_SW].recvmsg	= tls_sw_recvmsg;
-	prot[TLS_SW][TLS_SW].close	= tls_sk_proto_close;
+	prot[TLS_SW][TLS_SW].recvmsg		= tls_sw_recvmsg;
+	prot[TLS_SW][TLS_SW].stream_memory_read	= tls_sw_stream_read;
+	prot[TLS_SW][TLS_SW].close		= tls_sk_proto_close;
 
 #ifdef CONFIG_TLS_DEVICE
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
@@ -724,7 +726,6 @@ static int __init tls_register(void)
 	build_protos(tls_prots[TLSV4], &tcp_prot);
 
 	tls_sw_proto_ops = inet_stream_ops;
-	tls_sw_proto_ops.poll = tls_sw_poll;
 	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
 #ifdef CONFIG_TLS_DEVICE
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index aa9fdce272b6..a525fc4c2a4b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
  * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
  * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
+ * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -213,153 +214,89 @@ static int tls_do_decryption(struct sock *sk,
 	return ret;
 }
 
-static void trim_sg(struct sock *sk, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size, int target_size)
-{
-	int i = *sg_num_elem - 1;
-	int trim = *sg_size - target_size;
-
-	if (trim <= 0) {
-		WARN_ON(trim < 0);
-		return;
-	}
-
-	*sg_size = target_size;
-	while (trim >= sg[i].length) {
-		trim -= sg[i].length;
-		sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-		i--;
-
-		if (i < 0)
-			goto out;
-	}
-
-	sg[i].length -= trim;
-	sk_mem_uncharge(sk, trim);
-
-out:
-	*sg_num_elem = i + 1;
-}
-
-static void trim_both_sgl(struct sock *sk, int target_size)
+static void tls_trim_both_msgs(struct sock *sk, int target_size)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
 
-	trim_sg(sk, &rec->sg_plaintext_data[1],
-		&rec->sg_plaintext_num_elem,
-		&rec->sg_plaintext_size,
-		target_size);
-
+	sk_msg_trim(sk, &rec->msg_plaintext, target_size);
 	if (target_size > 0)
 		target_size += tls_ctx->tx.overhead_size;
-
-	trim_sg(sk, &rec->sg_encrypted_data[1],
-		&rec->sg_encrypted_num_elem,
-		&rec->sg_encrypted_size,
-		target_size);
+	sk_msg_trim(sk, &rec->msg_encrypted, target_size);
 }
 
-static int alloc_encrypted_sg(struct sock *sk, int len)
+static int tls_alloc_encrypted_msg(struct sock *sk, int len)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
-	int rc = 0;
-
-	rc = sk_alloc_sg(sk, len,
-			 &rec->sg_encrypted_data[1], 0,
-			 &rec->sg_encrypted_num_elem,
-			 &rec->sg_encrypted_size, 0);
-
-	if (rc == -ENOSPC)
-		rec->sg_encrypted_num_elem =
-			ARRAY_SIZE(rec->sg_encrypted_data) - 1;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
 
-	return rc;
+	return sk_msg_alloc(sk, msg_en, len, 0);
 }
 
-static int move_to_plaintext_sg(struct sock *sk, int required_size)
+static int tls_clone_plaintext_msg(struct sock *sk, int required)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
-	struct scatterlist *plain_sg = &rec->sg_plaintext_data[1];
-	struct scatterlist *enc_sg = &rec->sg_encrypted_data[1];
-	int enc_sg_idx = 0;
+	struct sk_msg *msg_pl = &rec->msg_plaintext;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
 	int skip, len;
 
-	if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS)
-		return -ENOSPC;
-
-	/* We add page references worth len bytes from enc_sg at the
-	 * end of plain_sg. It is guaranteed that sg_encrypted_data
+	/* We add page references worth len bytes from encrypted sg
+	 * at the end of plaintext sg. It is guaranteed that msg_en
 	 * has enough required room (ensured by caller).
 	 */
-	len = required_size - rec->sg_plaintext_size;
+	len = required - msg_pl->sg.size;
 
-	/* Skip initial bytes in sg_encrypted_data to be able
-	 * to use same offset of both plain and encrypted data.
+	/* Skip initial bytes in msg_en's data to be able to use
+	 * same offset of both plain and encrypted data.
 	 */
-	skip = tls_ctx->tx.prepend_size + rec->sg_plaintext_size;
+	skip = tls_ctx->tx.prepend_size + msg_pl->sg.size;
 
-	while (enc_sg_idx < rec->sg_encrypted_num_elem) {
-		if (enc_sg[enc_sg_idx].length > skip)
-			break;
-
-		skip -= enc_sg[enc_sg_idx].length;
-		enc_sg_idx++;
-	}
-
-	/* unmark the end of plain_sg*/
-	sg_unmark_end(plain_sg + rec->sg_plaintext_num_elem - 1);
-
-	while (len) {
-		struct page *page = sg_page(&enc_sg[enc_sg_idx]);
-		int bytes = enc_sg[enc_sg_idx].length - skip;
-		int offset = enc_sg[enc_sg_idx].offset + skip;
-
-		if (bytes > len)
-			bytes = len;
-		else
-			enc_sg_idx++;
+	return sk_msg_clone(sk, msg_pl, msg_en, skip, len);
+}
 
-		/* Skipping is required only one time */
-		skip = 0;
+static struct tls_rec *tls_get_rec(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct sk_msg *msg_pl, *msg_en;
+	struct tls_rec *rec;
+	int mem_size;
 
-		/* Increment page reference */
-		get_page(page);
+	mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send);
 
-		sg_set_page(&plain_sg[rec->sg_plaintext_num_elem], page,
-			    bytes, offset);
+	rec = kzalloc(mem_size, sk->sk_allocation);
+	if (!rec)
+		return NULL;
 
-		sk_mem_charge(sk, bytes);
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
 
-		len -= bytes;
-		rec->sg_plaintext_size += bytes;
+	sk_msg_init(msg_pl);
+	sk_msg_init(msg_en);
 
-		rec->sg_plaintext_num_elem++;
+	sg_init_table(rec->sg_aead_in, 2);
+	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
+		   sizeof(rec->aad_space));
+	sg_unmark_end(&rec->sg_aead_in[1]);
 
-		if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS)
-			return -ENOSPC;
-	}
+	sg_init_table(rec->sg_aead_out, 2);
+	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
+		   sizeof(rec->aad_space));
+	sg_unmark_end(&rec->sg_aead_out[1]);
 
-	return 0;
+	return rec;
 }
 
-static void free_sg(struct sock *sk, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size)
+static void tls_free_rec(struct sock *sk, struct tls_rec *rec)
 {
-	int i, n = *sg_num_elem;
-
-	for (i = 0; i < n; ++i) {
-		sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-	}
-	*sg_num_elem = 0;
-	*sg_size = 0;
+	sk_msg_free(sk, &rec->msg_encrypted);
+	sk_msg_free(sk, &rec->msg_plaintext);
+	kfree(rec);
 }
 
 static void tls_free_open_rec(struct sock *sk)
@@ -368,19 +305,10 @@ static void tls_free_open_rec(struct sock *sk)
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
 
-	/* Return if there is no open record */
-	if (!rec)
-		return;
-
-	free_sg(sk, &rec->sg_encrypted_data[1],
-		&rec->sg_encrypted_num_elem,
-		&rec->sg_encrypted_size);
-
-	free_sg(sk, &rec->sg_plaintext_data[1],
-		&rec->sg_plaintext_num_elem,
-		&rec->sg_plaintext_size);
-
-	kfree(rec);
+	if (rec) {
+		tls_free_rec(sk, rec);
+		ctx->open_rec = NULL;
+	}
 }
 
 int tls_tx_records(struct sock *sk, int flags)
@@ -388,6 +316,7 @@ int tls_tx_records(struct sock *sk, int flags)
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec, *tmp;
+	struct sk_msg *msg_en;
 	int tx_flags, rc = 0;
 
 	if (tls_is_partially_sent_record(tls_ctx)) {
@@ -407,9 +336,7 @@ int tls_tx_records(struct sock *sk, int flags)
 		 * Remove the head of tx_list
 		 */
 		list_del(&rec->list);
-		free_sg(sk, &rec->sg_plaintext_data[1],
-			&rec->sg_plaintext_num_elem, &rec->sg_plaintext_size);
-
+		sk_msg_free(sk, &rec->msg_plaintext);
 		kfree(rec);
 	}
 
@@ -421,17 +348,15 @@ int tls_tx_records(struct sock *sk, int flags)
 			else
 				tx_flags = flags;
 
+			msg_en = &rec->msg_encrypted;
 			rc = tls_push_sg(sk, tls_ctx,
-					 &rec->sg_encrypted_data[1],
+					 &msg_en->sg.data[msg_en->sg.curr],
 					 0, tx_flags);
 			if (rc)
 				goto tx_err;
 
 			list_del(&rec->list);
-			free_sg(sk, &rec->sg_plaintext_data[1],
-				&rec->sg_plaintext_num_elem,
-				&rec->sg_plaintext_size);
-
+			sk_msg_free(sk, &rec->msg_plaintext);
 			kfree(rec);
 		} else {
 			break;
@@ -451,15 +376,18 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 	struct sock *sk = req->data;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct scatterlist *sge;
+	struct sk_msg *msg_en;
 	struct tls_rec *rec;
 	bool ready = false;
 	int pending;
 
 	rec = container_of(aead_req, struct tls_rec, aead_req);
+	msg_en = &rec->msg_encrypted;
 
-	rec->sg_encrypted_data[1].offset -= tls_ctx->tx.prepend_size;
-	rec->sg_encrypted_data[1].length += tls_ctx->tx.prepend_size;
-
+	sge = sk_msg_elem(msg_en, msg_en->sg.curr);
+	sge->offset -= tls_ctx->tx.prepend_size;
+	sge->length += tls_ctx->tx.prepend_size;
 
 	/* Check if error is previously set on socket */
 	if (err || sk->sk_err) {
@@ -497,31 +425,29 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 
 	/* Schedule the transmission */
 	if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
-		schedule_delayed_work(&ctx->tx_work.work, 2);
+		schedule_delayed_work(&ctx->tx_work.work, 1);
 }
 
 static int tls_do_encryption(struct sock *sk,
 			     struct tls_context *tls_ctx,
 			     struct tls_sw_context_tx *ctx,
 			     struct aead_request *aead_req,
-			     size_t data_len)
+			     size_t data_len, u32 start)
 {
 	struct tls_rec *rec = ctx->open_rec;
-	struct scatterlist *plain_sg = rec->sg_plaintext_data;
-	struct scatterlist *enc_sg = rec->sg_encrypted_data;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
+	struct scatterlist *sge = sk_msg_elem(msg_en, start);
 	int rc;
 
-	/* Skip the first index as it contains AAD data */
-	rec->sg_encrypted_data[1].offset += tls_ctx->tx.prepend_size;
-	rec->sg_encrypted_data[1].length -= tls_ctx->tx.prepend_size;
+	sge->offset += tls_ctx->tx.prepend_size;
+	sge->length -= tls_ctx->tx.prepend_size;
 
-	/* If it is inplace crypto, then pass same SG list as both src, dst */
-	if (rec->inplace_crypto)
-		plain_sg = enc_sg;
+	msg_en->sg.curr = start;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
-	aead_request_set_crypt(aead_req, plain_sg, enc_sg,
+	aead_request_set_crypt(aead_req, rec->sg_aead_in,
+			       rec->sg_aead_out,
 			       data_len, tls_ctx->tx.iv);
 
 	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
@@ -534,8 +460,8 @@ static int tls_do_encryption(struct sock *sk,
 	rc = crypto_aead_encrypt(aead_req);
 	if (!rc || rc != -EINPROGRESS) {
 		atomic_dec(&ctx->encrypt_pending);
-		rec->sg_encrypted_data[1].offset -= tls_ctx->tx.prepend_size;
-		rec->sg_encrypted_data[1].length += tls_ctx->tx.prepend_size;
+		sge->offset -= tls_ctx->tx.prepend_size;
+		sge->length += tls_ctx->tx.prepend_size;
 	}
 
 	if (!rc) {
@@ -551,177 +477,318 @@ static int tls_do_encryption(struct sock *sk,
 	return rc;
 }
 
+static int tls_split_open_record(struct sock *sk, struct tls_rec *from,
+				 struct tls_rec **to, struct sk_msg *msg_opl,
+				 struct sk_msg *msg_oen, u32 split_point,
+				 u32 tx_overhead_size, u32 *orig_end)
+{
+	u32 i, j, bytes = 0, apply = msg_opl->apply_bytes;
+	struct scatterlist *sge, *osge, *nsge;
+	u32 orig_size = msg_opl->sg.size;
+	struct scatterlist tmp = { };
+	struct sk_msg *msg_npl;
+	struct tls_rec *new;
+	int ret;
+
+	new = tls_get_rec(sk);
+	if (!new)
+		return -ENOMEM;
+	ret = sk_msg_alloc(sk, &new->msg_encrypted, msg_opl->sg.size +
+			   tx_overhead_size, 0);
+	if (ret < 0) {
+		tls_free_rec(sk, new);
+		return ret;
+	}
+
+	*orig_end = msg_opl->sg.end;
+	i = msg_opl->sg.start;
+	sge = sk_msg_elem(msg_opl, i);
+	while (apply && sge->length) {
+		if (sge->length > apply) {
+			u32 len = sge->length - apply;
+
+			get_page(sg_page(sge));
+			sg_set_page(&tmp, sg_page(sge), len,
+				    sge->offset + apply);
+			sge->length = apply;
+			bytes += apply;
+			apply = 0;
+		} else {
+			apply -= sge->length;
+			bytes += sge->length;
+		}
+
+		sk_msg_iter_var_next(i);
+		if (i == msg_opl->sg.end)
+			break;
+		sge = sk_msg_elem(msg_opl, i);
+	}
+
+	msg_opl->sg.end = i;
+	msg_opl->sg.curr = i;
+	msg_opl->sg.copybreak = 0;
+	msg_opl->apply_bytes = 0;
+	msg_opl->sg.size = bytes;
+
+	msg_npl = &new->msg_plaintext;
+	msg_npl->apply_bytes = apply;
+	msg_npl->sg.size = orig_size - bytes;
+
+	j = msg_npl->sg.start;
+	nsge = sk_msg_elem(msg_npl, j);
+	if (tmp.length) {
+		memcpy(nsge, &tmp, sizeof(*nsge));
+		sk_msg_iter_var_next(j);
+		nsge = sk_msg_elem(msg_npl, j);
+	}
+
+	osge = sk_msg_elem(msg_opl, i);
+	while (osge->length) {
+		memcpy(nsge, osge, sizeof(*nsge));
+		sg_unmark_end(nsge);
+		sk_msg_iter_var_next(i);
+		sk_msg_iter_var_next(j);
+		if (i == *orig_end)
+			break;
+		osge = sk_msg_elem(msg_opl, i);
+		nsge = sk_msg_elem(msg_npl, j);
+	}
+
+	msg_npl->sg.end = j;
+	msg_npl->sg.curr = j;
+	msg_npl->sg.copybreak = 0;
+
+	*to = new;
+	return 0;
+}
+
+static void tls_merge_open_record(struct sock *sk, struct tls_rec *to,
+				  struct tls_rec *from, u32 orig_end)
+{
+	struct sk_msg *msg_npl = &from->msg_plaintext;
+	struct sk_msg *msg_opl = &to->msg_plaintext;
+	struct scatterlist *osge, *nsge;
+	u32 i, j;
+
+	i = msg_opl->sg.end;
+	sk_msg_iter_var_prev(i);
+	j = msg_npl->sg.start;
+
+	osge = sk_msg_elem(msg_opl, i);
+	nsge = sk_msg_elem(msg_npl, j);
+
+	if (sg_page(osge) == sg_page(nsge) &&
+	    osge->offset + osge->length == nsge->offset) {
+		osge->length += nsge->length;
+		put_page(sg_page(nsge));
+	}
+
+	msg_opl->sg.end = orig_end;
+	msg_opl->sg.curr = orig_end;
+	msg_opl->sg.copybreak = 0;
+	msg_opl->apply_bytes = msg_opl->sg.size + msg_npl->sg.size;
+	msg_opl->sg.size += msg_npl->sg.size;
+
+	sk_msg_free(sk, &to->msg_encrypted);
+	sk_msg_xfer_full(&to->msg_encrypted, &from->msg_encrypted);
+
+	kfree(from);
+}
+
 static int tls_push_record(struct sock *sk, int flags,
 			   unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct tls_rec *rec = ctx->open_rec;
+	struct tls_rec *rec = ctx->open_rec, *tmp = NULL;
+	u32 i, split_point, uninitialized_var(orig_end);
+	struct sk_msg *msg_pl, *msg_en;
 	struct aead_request *req;
+	bool split;
 	int rc;
 
 	if (!rec)
 		return 0;
 
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
+
+	split_point = msg_pl->apply_bytes;
+	split = split_point && split_point < msg_pl->sg.size;
+	if (split) {
+		rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
+					   split_point, tls_ctx->tx.overhead_size,
+					   &orig_end);
+		if (rc < 0)
+			return rc;
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+			    tls_ctx->tx.overhead_size);
+	}
+
 	rec->tx_flags = flags;
 	req = &rec->aead_req;
 
-	sg_mark_end(rec->sg_plaintext_data + rec->sg_plaintext_num_elem);
-	sg_mark_end(rec->sg_encrypted_data + rec->sg_encrypted_num_elem);
+	i = msg_pl->sg.end;
+	sk_msg_iter_var_prev(i);
+	sg_mark_end(sk_msg_elem(msg_pl, i));
+
+	i = msg_pl->sg.start;
+	sg_chain(rec->sg_aead_in, 2, rec->inplace_crypto ?
+		 &msg_en->sg.data[i] : &msg_pl->sg.data[i]);
+
+	i = msg_en->sg.end;
+	sk_msg_iter_var_prev(i);
+	sg_mark_end(sk_msg_elem(msg_en, i));
 
-	tls_make_aad(rec->aad_space, rec->sg_plaintext_size,
+	i = msg_en->sg.start;
+	sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]);
+
+	tls_make_aad(rec->aad_space, msg_pl->sg.size,
 		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
 		     record_type);
 
 	tls_fill_prepend(tls_ctx,
-			 page_address(sg_page(&rec->sg_encrypted_data[1])) +
-			 rec->sg_encrypted_data[1].offset,
-			 rec->sg_plaintext_size, record_type);
-
-	tls_ctx->pending_open_record_frags = 0;
+			 page_address(sg_page(&msg_en->sg.data[i])) +
+			 msg_en->sg.data[i].offset, msg_pl->sg.size,
+			 record_type);
 
-	rc = tls_do_encryption(sk, tls_ctx, ctx, req, rec->sg_plaintext_size);
-	if (rc == -EINPROGRESS)
-		return -EINPROGRESS;
+	tls_ctx->pending_open_record_frags = false;
 
+	rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size, i);
 	if (rc < 0) {
-		tls_err_abort(sk, EBADMSG);
+		if (rc != -EINPROGRESS) {
+			tls_err_abort(sk, EBADMSG);
+			if (split) {
+				tls_ctx->pending_open_record_frags = true;
+				tls_merge_open_record(sk, rec, tmp, orig_end);
+			}
+		}
 		return rc;
+	} else if (split) {
+		msg_pl = &tmp->msg_plaintext;
+		msg_en = &tmp->msg_encrypted;
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+			    tls_ctx->tx.overhead_size);
+		tls_ctx->pending_open_record_frags = true;
+		ctx->open_rec = tmp;
 	}
 
 	return tls_tx_records(sk, flags);
 }
 
-static int tls_sw_push_pending_record(struct sock *sk, int flags)
-{
-	return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA);
-}
-
-static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
-			      int length, int *pages_used,
-			      unsigned int *size_used,
-			      struct scatterlist *to, int to_max_pages,
-			      bool charge)
+static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
+			       bool full_record, u8 record_type,
+			       size_t *copied, int flags)
 {
-	struct page *pages[MAX_SKB_FRAGS];
-
-	size_t offset;
-	ssize_t copied, use;
-	int i = 0;
-	unsigned int size = *size_used;
-	int num_elem = *pages_used;
-	int rc = 0;
-	int maxpages;
-
-	while (length > 0) {
-		i = 0;
-		maxpages = to_max_pages - num_elem;
-		if (maxpages == 0) {
-			rc = -EFAULT;
-			goto out;
-		}
-		copied = iov_iter_get_pages(from, pages,
-					    length,
-					    maxpages, &offset);
-		if (copied <= 0) {
-			rc = -EFAULT;
-			goto out;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct sk_msg msg_redir = { };
+	struct sk_psock *psock;
+	struct sock *sk_redir;
+	struct tls_rec *rec;
+	int err = 0, send;
+	bool enospc;
+
+	psock = sk_psock_get(sk);
+	if (!psock)
+		return tls_push_record(sk, flags, record_type);
+more_data:
+	enospc = sk_msg_full(msg);
+	if (psock->eval == __SK_NONE)
+		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+	if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
+	    !enospc && !full_record) {
+		err = -ENOSPC;
+		goto out_err;
+	}
+	msg->cork_bytes = 0;
+	send = msg->sg.size;
+	if (msg->apply_bytes && msg->apply_bytes < send)
+		send = msg->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		err = tls_push_record(sk, flags, record_type);
+		if (err < 0) {
+			*copied -= sk_msg_free(sk, msg);
+			tls_free_open_rec(sk);
+			goto out_err;
 		}
-
-		iov_iter_advance(from, copied);
-
-		length -= copied;
-		size += copied;
-		while (copied) {
-			use = min_t(int, copied, PAGE_SIZE - offset);
-
-			sg_set_page(&to[num_elem],
-				    pages[i], use, offset);
-			sg_unmark_end(&to[num_elem]);
-			if (charge)
-				sk_mem_charge(sk, use);
-
-			offset = 0;
-			copied -= use;
-
-			++i;
-			++num_elem;
+		break;
+	case __SK_REDIRECT:
+		sk_redir = psock->sk_redir;
+		memcpy(&msg_redir, msg, sizeof(*msg));
+		if (msg->apply_bytes < send)
+			msg->apply_bytes = 0;
+		else
+			msg->apply_bytes -= send;
+		sk_msg_return_zero(sk, msg, send);
+		msg->sg.size -= send;
+		release_sock(sk);
+		err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
+		lock_sock(sk);
+		if (err < 0) {
+			*copied -= sk_msg_free_nocharge(sk, &msg_redir);
+			msg->sg.size = 0;
 		}
+		if (msg->sg.size == 0)
+			tls_free_open_rec(sk);
+		break;
+	case __SK_DROP:
+	default:
+		sk_msg_free_partial(sk, msg, send);
+		if (msg->apply_bytes < send)
+			msg->apply_bytes = 0;
+		else
+			msg->apply_bytes -= send;
+		if (msg->sg.size == 0)
+			tls_free_open_rec(sk);
+		*copied -= send;
+		err = -EACCES;
 	}
 
-	/* Mark the end in the last sg entry if newly added */
-	if (num_elem > *pages_used)
-		sg_mark_end(&to[num_elem - 1]);
-out:
-	if (rc)
-		iov_iter_revert(from, size - *size_used);
-	*size_used = size;
-	*pages_used = num_elem;
+	if (likely(!err)) {
+		bool reset_eval = !ctx->open_rec;
 
-	return rc;
-}
-
-static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
-			     int bytes)
-{
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct tls_rec *rec = ctx->open_rec;
-	struct scatterlist *sg = &rec->sg_plaintext_data[1];
-	int copy, i, rc = 0;
-
-	for (i = tls_ctx->pending_open_record_frags;
-	     i < rec->sg_plaintext_num_elem; ++i) {
-		copy = sg[i].length;
-		if (copy_from_iter(
-				page_address(sg_page(&sg[i])) + sg[i].offset,
-				copy, from) != copy) {
-			rc = -EFAULT;
-			goto out;
+		rec = ctx->open_rec;
+		if (rec) {
+			msg = &rec->msg_plaintext;
+			if (!msg->apply_bytes)
+				reset_eval = true;
 		}
-		bytes -= copy;
-
-		++tls_ctx->pending_open_record_frags;
-
-		if (!bytes)
-			break;
+		if (reset_eval) {
+			psock->eval = __SK_NONE;
+			if (psock->sk_redir) {
+				sock_put(psock->sk_redir);
+				psock->sk_redir = NULL;
+			}
+		}
+		if (rec)
+			goto more_data;
 	}
-
-out:
-	return rc;
+ out_err:
+	sk_psock_put(sk, psock);
+	return err;
 }
 
-static struct tls_rec *get_rec(struct sock *sk)
+static int tls_sw_push_pending_record(struct sock *sk, int flags)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct tls_rec *rec;
-	int mem_size;
-
-	/* Return if we already have an open record */
-	if (ctx->open_rec)
-		return ctx->open_rec;
-
-	mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send);
+	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_pl;
+	size_t copied;
 
-	rec = kzalloc(mem_size, sk->sk_allocation);
 	if (!rec)
-		return NULL;
-
-	sg_init_table(&rec->sg_plaintext_data[0],
-		      ARRAY_SIZE(rec->sg_plaintext_data));
-	sg_init_table(&rec->sg_encrypted_data[0],
-		      ARRAY_SIZE(rec->sg_encrypted_data));
-
-	sg_set_buf(&rec->sg_plaintext_data[0], rec->aad_space,
-		   sizeof(rec->aad_space));
-	sg_set_buf(&rec->sg_encrypted_data[0], rec->aad_space,
-		   sizeof(rec->aad_space));
+		return 0;
 
-	ctx->open_rec = rec;
-	rec->inplace_crypto = 1;
+	msg_pl = &rec->msg_plaintext;
+	copied = msg_pl->sg.size;
+	if (!copied)
+		return 0;
 
-	return rec;
+	return bpf_exec_tx_verdict(msg_pl, sk, true, TLS_RECORD_TYPE_DATA,
+				   &copied, flags);
 }
 
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
@@ -735,6 +802,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
 	bool eor = !(msg->msg_flags & MSG_MORE);
 	size_t try_to_copy, copied = 0;
+	struct sk_msg *msg_pl, *msg_en;
 	struct tls_rec *rec;
 	int required_size;
 	int num_async = 0;
@@ -772,29 +840,35 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 			goto send_end;
 		}
 
-		rec = get_rec(sk);
+		if (ctx->open_rec)
+			rec = ctx->open_rec;
+		else
+			rec = ctx->open_rec = tls_get_rec(sk);
 		if (!rec) {
 			ret = -ENOMEM;
 			goto send_end;
 		}
 
-		orig_size = rec->sg_plaintext_size;
+		msg_pl = &rec->msg_plaintext;
+		msg_en = &rec->msg_encrypted;
+
+		orig_size = msg_pl->sg.size;
 		full_record = false;
 		try_to_copy = msg_data_left(msg);
-		record_room = TLS_MAX_PAYLOAD_SIZE - rec->sg_plaintext_size;
+		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
 		if (try_to_copy >= record_room) {
 			try_to_copy = record_room;
 			full_record = true;
 		}
 
-		required_size = rec->sg_plaintext_size + try_to_copy +
+		required_size = msg_pl->sg.size + try_to_copy +
 				tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
 
 alloc_encrypted:
-		ret = alloc_encrypted_sg(sk, required_size);
+		ret = tls_alloc_encrypted_msg(sk, required_size);
 		if (ret) {
 			if (ret != -ENOSPC)
 				goto wait_for_memory;
@@ -803,17 +877,15 @@ alloc_encrypted:
 			 * actually allocated. The difference is due
 			 * to max sg elements limit
 			 */
-			try_to_copy -= required_size - rec->sg_encrypted_size;
+			try_to_copy -= required_size - msg_en->sg.size;
 			full_record = true;
 		}
 
 		if (!is_kvec && (full_record || eor) && !async_capable) {
-			ret = zerocopy_from_iter(sk, &msg->msg_iter,
-				try_to_copy, &rec->sg_plaintext_num_elem,
-				&rec->sg_plaintext_size,
-				&rec->sg_plaintext_data[1],
-				ARRAY_SIZE(rec->sg_plaintext_data) - 1,
-				true);
+			u32 first = msg_pl->sg.end;
+
+			ret = sk_msg_zerocopy_from_iter(sk, &msg->msg_iter,
+							msg_pl, try_to_copy);
 			if (ret)
 				goto fallback_to_reg_send;
 
@@ -821,25 +893,34 @@ alloc_encrypted:
 
 			num_zc++;
 			copied += try_to_copy;
-			ret = tls_push_record(sk, msg->msg_flags, record_type);
+
+			sk_msg_sg_copy_set(msg_pl, first);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied,
+						  msg->msg_flags);
 			if (ret) {
 				if (ret == -EINPROGRESS)
 					num_async++;
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret == -ENOSPC)
+					goto rollback_iter;
 				else if (ret != -EAGAIN)
 					goto send_end;
 			}
 			continue;
-
+rollback_iter:
+			copied -= try_to_copy;
+			sk_msg_sg_copy_clear(msg_pl, first);
+			iov_iter_revert(&msg->msg_iter,
+					msg_pl->sg.size - orig_size);
 fallback_to_reg_send:
-			trim_sg(sk, &rec->sg_plaintext_data[1],
-				&rec->sg_plaintext_num_elem,
-				&rec->sg_plaintext_size,
-				orig_size);
+			sk_msg_trim(sk, msg_pl, orig_size);
 		}
 
-		required_size = rec->sg_plaintext_size + try_to_copy;
+		required_size = msg_pl->sg.size + try_to_copy;
 
-		ret = move_to_plaintext_sg(sk, required_size);
+		ret = tls_clone_plaintext_msg(sk, required_size);
 		if (ret) {
 			if (ret != -ENOSPC)
 				goto send_end;
@@ -848,28 +929,36 @@ fallback_to_reg_send:
 			 * actually allocated. The difference is due
 			 * to max sg elements limit
 			 */
-			try_to_copy -= required_size - rec->sg_plaintext_size;
+			try_to_copy -= required_size - msg_pl->sg.size;
 			full_record = true;
-
-			trim_sg(sk, &rec->sg_encrypted_data[1],
-				&rec->sg_encrypted_num_elem,
-				&rec->sg_encrypted_size,
-				rec->sg_plaintext_size +
-				tls_ctx->tx.overhead_size);
+			sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+				    tls_ctx->tx.overhead_size);
 		}
 
-		ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
-		if (ret)
+		ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_pl,
+					       try_to_copy);
+		if (ret < 0)
 			goto trim_sgl;
 
+		/* Open records defined only if successfully copied, otherwise
+		 * we would trim the sg but not reset the open record frags.
+		 */
+		tls_ctx->pending_open_record_frags = true;
 		copied += try_to_copy;
 		if (full_record || eor) {
-			ret = tls_push_record(sk, msg->msg_flags, record_type);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied,
+						  msg->msg_flags);
 			if (ret) {
 				if (ret == -EINPROGRESS)
 					num_async++;
-				else if (ret != -EAGAIN)
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret != -EAGAIN) {
+					if (ret == -ENOSPC)
+						ret = 0;
 					goto send_end;
+				}
 			}
 		}
 
@@ -881,11 +970,11 @@ wait_for_memory:
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret) {
 trim_sgl:
-			trim_both_sgl(sk, orig_size);
+			tls_trim_both_msgs(sk, orig_size);
 			goto send_end;
 		}
 
-		if (rec->sg_encrypted_size < required_size)
+		if (msg_en->sg.size < required_size)
 			goto alloc_encrypted;
 	}
 
@@ -928,10 +1017,10 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
-	size_t orig_size = size;
-	struct scatterlist *sg;
+	struct sk_msg *msg_pl;
 	struct tls_rec *rec;
 	int num_async = 0;
+	size_t copied = 0;
 	bool full_record;
 	int record_room;
 	int ret = 0;
@@ -964,26 +1053,33 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 			goto sendpage_end;
 		}
 
-		rec = get_rec(sk);
+		if (ctx->open_rec)
+			rec = ctx->open_rec;
+		else
+			rec = ctx->open_rec = tls_get_rec(sk);
 		if (!rec) {
 			ret = -ENOMEM;
 			goto sendpage_end;
 		}
 
+		msg_pl = &rec->msg_plaintext;
+
 		full_record = false;
-		record_room = TLS_MAX_PAYLOAD_SIZE - rec->sg_plaintext_size;
+		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
+		copied = 0;
 		copy = size;
 		if (copy >= record_room) {
 			copy = record_room;
 			full_record = true;
 		}
-		required_size = rec->sg_plaintext_size + copy +
-			      tls_ctx->tx.overhead_size;
+
+		required_size = msg_pl->sg.size + copy +
+				tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
 alloc_payload:
-		ret = alloc_encrypted_sg(sk, required_size);
+		ret = tls_alloc_encrypted_msg(sk, required_size);
 		if (ret) {
 			if (ret != -ENOSPC)
 				goto wait_for_memory;
@@ -992,33 +1088,32 @@ alloc_payload:
 			 * actually allocated. The difference is due
 			 * to max sg elements limit
 			 */
-			copy -= required_size - rec->sg_plaintext_size;
+			copy -= required_size - msg_pl->sg.size;
 			full_record = true;
 		}
 
-		get_page(page);
-		sg = &rec->sg_plaintext_data[1] + rec->sg_plaintext_num_elem;
-		sg_set_page(sg, page, copy, offset);
-		sg_unmark_end(sg);
-
-		rec->sg_plaintext_num_elem++;
-
+		sk_msg_page_add(msg_pl, page, copy, offset);
 		sk_mem_charge(sk, copy);
+
 		offset += copy;
 		size -= copy;
-		rec->sg_plaintext_size += copy;
-		tls_ctx->pending_open_record_frags = rec->sg_plaintext_num_elem;
+		copied += copy;
 
-		if (full_record || eor ||
-		    rec->sg_plaintext_num_elem ==
-		    ARRAY_SIZE(rec->sg_plaintext_data) - 1) {
+		tls_ctx->pending_open_record_frags = true;
+		if (full_record || eor || sk_msg_full(msg_pl)) {
 			rec->inplace_crypto = 0;
-			ret = tls_push_record(sk, flags, record_type);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied, flags);
 			if (ret) {
 				if (ret == -EINPROGRESS)
 					num_async++;
-				else if (ret != -EAGAIN)
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret != -EAGAIN) {
+					if (ret == -ENOSPC)
+						ret = 0;
 					goto sendpage_end;
+				}
 			}
 		}
 		continue;
@@ -1027,7 +1122,7 @@ wait_for_sndbuf:
 wait_for_memory:
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret) {
-			trim_both_sgl(sk, rec->sg_plaintext_size);
+			tls_trim_both_msgs(sk, msg_pl->sg.size);
 			goto sendpage_end;
 		}
 
@@ -1042,24 +1137,20 @@ wait_for_memory:
 		}
 	}
 sendpage_end:
-	if (orig_size > size)
-		ret = orig_size - size;
-	else
-		ret = sk_stream_error(sk, flags, ret);
-
+	ret = sk_stream_error(sk, flags, ret);
 	release_sock(sk);
-	return ret;
+	return copied ? copied : ret;
 }
 
-static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
-				     long timeo, int *err)
+static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
+				     int flags, long timeo, int *err)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 	struct sk_buff *skb;
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
-	while (!(skb = ctx->recv_pkt)) {
+	while (!(skb = ctx->recv_pkt) && sk_psock_queue_empty(psock)) {
 		if (sk->sk_err) {
 			*err = sock_error(sk);
 			return NULL;
@@ -1078,7 +1169,10 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 
 		add_wait_queue(sk_sleep(sk), &wait);
 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-		sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
+		sk_wait_event(sk, &timeo,
+			      ctx->recv_pkt != skb ||
+			      !sk_psock_queue_empty(psock),
+			      &wait);
 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		remove_wait_queue(sk_sleep(sk), &wait);
 
@@ -1092,6 +1186,64 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 	return skb;
 }
 
+static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+			       int length, int *pages_used,
+			       unsigned int *size_used,
+			       struct scatterlist *to,
+			       int to_max_pages)
+{
+	int rc = 0, i = 0, num_elem = *pages_used, maxpages;
+	struct page *pages[MAX_SKB_FRAGS];
+	unsigned int size = *size_used;
+	ssize_t copied, use;
+	size_t offset;
+
+	while (length > 0) {
+		i = 0;
+		maxpages = to_max_pages - num_elem;
+		if (maxpages == 0) {
+			rc = -EFAULT;
+			goto out;
+		}
+		copied = iov_iter_get_pages(from, pages,
+					    length,
+					    maxpages, &offset);
+		if (copied <= 0) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		iov_iter_advance(from, copied);
+
+		length -= copied;
+		size += copied;
+		while (copied) {
+			use = min_t(int, copied, PAGE_SIZE - offset);
+
+			sg_set_page(&to[num_elem],
+				    pages[i], use, offset);
+			sg_unmark_end(&to[num_elem]);
+			/* We do not uncharge memory from this API */
+
+			offset = 0;
+			copied -= use;
+
+			i++;
+			num_elem++;
+		}
+	}
+	/* Mark the end in the last sg entry if newly added */
+	if (num_elem > *pages_used)
+		sg_mark_end(&to[num_elem - 1]);
+out:
+	if (rc)
+		iov_iter_revert(from, size - *size_used);
+	*size_used = size;
+	*pages_used = num_elem;
+
+	return rc;
+}
+
 /* This function decrypts the input skb into either out_iov or in out_sg
  * or in skb buffers itself. The input parameter 'zc' indicates if
  * zero-copy mode needs to be tried or not. With zero-copy mode, either
@@ -1189,9 +1341,9 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 			sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE);
 
 			*chunk = 0;
-			err = zerocopy_from_iter(sk, out_iov, data_len, &pages,
-						 chunk, &sgout[1],
-						 (n_sgout - 1), false);
+			err = tls_setup_from_iter(sk, out_iov, data_len,
+						  &pages, chunk, &sgout[1],
+						  (n_sgout - 1));
 			if (err < 0)
 				goto fallback_to_reg_recv;
 		} else if (out_sg) {
@@ -1297,6 +1449,7 @@ int tls_sw_recvmsg(struct sock *sk,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct sk_psock *psock;
 	unsigned char control;
 	struct strp_msg *rxm;
 	struct sk_buff *skb;
@@ -1312,6 +1465,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
 
+	psock = sk_psock_get(sk);
 	lock_sock(sk);
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -1321,9 +1475,19 @@ int tls_sw_recvmsg(struct sock *sk,
 		bool async = false;
 		int chunk = 0;
 
-		skb = tls_wait_data(sk, flags, timeo, &err);
-		if (!skb)
+		skb = tls_wait_data(sk, psock, flags, timeo, &err);
+		if (!skb) {
+			if (psock) {
+				int ret = __tcp_bpf_recvmsg(sk, psock, msg, len);
+
+				if (ret > 0) {
+					copied += ret;
+					len -= ret;
+					continue;
+				}
+			}
 			goto recv_end;
+		}
 
 		rxm = strp_msg(skb);
 
@@ -1429,6 +1593,8 @@ recv_end:
 	}
 
 	release_sock(sk);
+	if (psock)
+		sk_psock_put(sk, psock);
 	return copied ? : err;
 }
 
@@ -1451,7 +1617,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
-	skb = tls_wait_data(sk, flags, timeo, &err);
+	skb = tls_wait_data(sk, NULL, flags, timeo, &err);
 	if (!skb)
 		goto splice_read_end;
 
@@ -1485,23 +1651,20 @@ splice_read_end:
 	return copied ? : err;
 }
 
-unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-			 struct poll_table_struct *wait)
+bool tls_sw_stream_read(const struct sock *sk)
 {
-	unsigned int ret;
-	struct sock *sk = sock->sk;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	bool ingress_empty = true;
+	struct sk_psock *psock;
 
-	/* Grab POLLOUT and POLLHUP from the underlying socket */
-	ret = ctx->sk_poll(file, sock, wait);
-
-	/* Clear POLLIN bits, and set based on recv_pkt */
-	ret &= ~(POLLIN | POLLRDNORM);
-	if (ctx->recv_pkt)
-		ret |= POLLIN | POLLRDNORM;
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock)
+		ingress_empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
 
-	return ret;
+	return !ingress_empty || ctx->recv_pkt;
 }
 
 static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
@@ -1580,8 +1743,15 @@ static void tls_data_ready(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct sk_psock *psock;
 
 	strp_data_ready(&ctx->strp);
+
+	psock = sk_psock_get(sk);
+	if (psock && !list_empty(&psock->ingress_msg)) {
+		ctx->saved_data_ready(sk);
+		sk_psock_put(sk, psock);
+	}
 }
 
 void tls_sw_free_resources_tx(struct sock *sk)
@@ -1619,25 +1789,15 @@ void tls_sw_free_resources_tx(struct sock *sk)
 
 		rec = list_first_entry(&ctx->tx_list,
 				       struct tls_rec, list);
-
-		free_sg(sk, &rec->sg_plaintext_data[1],
-			&rec->sg_plaintext_num_elem,
-			&rec->sg_plaintext_size);
-
 		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_plaintext);
 		kfree(rec);
 	}
 
 	list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) {
-		free_sg(sk, &rec->sg_encrypted_data[1],
-			&rec->sg_encrypted_num_elem,
-			&rec->sg_encrypted_size);
-
-		free_sg(sk, &rec->sg_plaintext_data[1],
-			&rec->sg_plaintext_num_elem,
-			&rec->sg_plaintext_size);
-
 		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_encrypted);
+		sk_msg_free(sk, &rec->msg_plaintext);
 		kfree(rec);
 	}
 
@@ -1829,8 +1989,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		sk->sk_data_ready = tls_data_ready;
 		write_unlock_bh(&sk->sk_callback_lock);
 
-		sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll;
-
 		strp_check_rcv(&sw_ctx_rx->strp);
 	}
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index a6258bc8ec4f..3497f2d80328 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -15,13 +15,15 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-	| **pin** | **help** }
+	{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+	| **delete** | **pin** | **help** }
 
 MAP COMMANDS
 =============
 
 |	**bpftool** **map { show | list }**   [*MAP*]
+|	**bpftool** **map create**     *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
+|		**entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
 |	**bpftool** **map dump**       *MAP*
 |	**bpftool** **map update**     *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
 |	**bpftool** **map lookup**     *MAP*  **key** *DATA*
@@ -36,6 +38,11 @@ MAP COMMANDS
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 |	*VALUE* := { *DATA* | *MAP* | *PROG* }
 |	*UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
+|	*TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
+|		| **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
+|		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
+|		| **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
+|		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** }
 
 DESCRIPTION
 ===========
@@ -47,6 +54,10 @@ DESCRIPTION
 		  Output will start with map ID followed by map type and
 		  zero or more named attributes (depending on kernel version).
 
+	**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+		  Create a new map with given parameters and pin it to *bpffs*
+		  as *FILE*.
+
 	**bpftool map dump**    *MAP*
 		  Dump all entries in a given *MAP*.
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 64156a16d530..12c803003ab2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -25,6 +25,8 @@ MAP COMMANDS
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
 |	**bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+|       **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
+|       **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
 |	**bpftool** **prog help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -37,6 +39,7 @@ MAP COMMANDS
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
 |	}
+|       *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** }
 
 
 DESCRIPTION
@@ -90,6 +93,14 @@ DESCRIPTION
 
 		  Note: *FILE* must be located in *bpffs* mount.
 
+        **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP*
+                  Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+                  to the map *MAP*.
+
+        **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP*
+                  Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+                  from the map *MAP*.
+
 	**bpftool prog help**
 		  Print short help message.
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 8dda77daeda9..04cd4f92ab89 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -22,11 +22,11 @@ SYNOPSIS
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
 
 	*MAP-COMMANDS* :=
-	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-	| **pin** | **event_pipe** | **help** }
+	{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+	| **delete** | **pin** | **event_pipe** | **help** }
 
 	*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
-	| **load** | **help** }
+	| **load** | **attach** | **detach** | **help** }
 
 	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
@@ -57,6 +57,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-m, --mapcompat
+		  Allow loading maps with unknown map definitions.
+
+
 SEE ALSO
 ========
 	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 74288a2197ab..dac7eff4c7e5 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -46,6 +46,13 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
 	-I$(srctree)/tools/lib/bpf \
 	-I$(srctree)/tools/perf
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
+ifneq ($(EXTRA_CFLAGS),)
+CFLAGS += $(EXTRA_CFLAGS)
+endif
+ifneq ($(EXTRA_LDFLAGS),)
+LDFLAGS += $(EXTRA_LDFLAGS)
+endif
+
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
 INSTALL ?= install
@@ -90,7 +97,7 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
 
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index df1060b852c1..c56545e87b0d 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -184,7 +184,7 @@ _bpftool()
 
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs'
+        local c='--version --json --pretty --bpffs --mapcompat'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
@@ -292,6 +292,23 @@ _bpftool()
                     fi
                     return 0
                     ;;
+                attach|detach)
+                    if [[ ${#words[@]} == 7 ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    if [[ ${#words[@]} == 6 ]]; then
+                        COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    if [[ $prev == "$command" ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+                    return 0
+                    ;;
                 load)
                     local obj
 
@@ -347,7 +364,7 @@ _bpftool()
                     ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin load \
+                        COMPREPLY=( $( compgen -W 'dump help pin attach detach load \
                             show list' -- "$cur" ) )
                     ;;
             esac
@@ -370,6 +387,42 @@ _bpftool()
                             ;;
                     esac
                     ;;
+                create)
+                    case $prev in
+                        $command)
+                            _filedir
+                            return 0
+                            ;;
+                        type)
+                            COMPREPLY=( $( compgen -W 'hash array prog_array \
+                                perf_event_array percpu_hash percpu_array \
+                                stack_trace cgroup_array lru_hash \
+                                lru_percpu_hash lpm_trie array_of_maps \
+                                hash_of_maps devmap sockmap cpumap xskmap \
+                                sockhash cgroup_storage reuseport_sockarray \
+                                percpu_cgroup_storage' -- \
+                                                   "$cur" ) )
+                            return 0
+                            ;;
+                        key|value|flags|name|entries)
+                            return 0
+                            ;;
+                        dev)
+                            _sysfs_get_netdevs
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'type'
+                            _bpftool_once_attr 'key'
+                            _bpftool_once_attr 'value'
+                            _bpftool_once_attr 'entries'
+                            _bpftool_once_attr 'name'
+                            _bpftool_once_attr 'flags'
+                            _bpftool_once_attr 'dev'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 lookup|getnext|delete)
                     case $prev in
                         $command)
@@ -483,7 +536,7 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'delete dump getnext help \
-                            lookup pin event_pipe show list update' -- \
+                            lookup pin event_pipe show list update create' -- \
                             "$cur" ) )
                     ;;
             esac
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b3a0709ea7ed..3318da8060bd 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -618,3 +618,24 @@ void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
 		jsonw_string_field(json_wtr, "ifname", name);
 	jsonw_end_object(json_wtr);
 }
+
+int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
+{
+	char *endptr;
+
+	NEXT_ARGP();
+
+	if (*val) {
+		p_err("%s already specified", what);
+		return -1;
+	}
+
+	*val = strtoul(**argv, &endptr, 0);
+	if (*endptr) {
+		p_err("can't parse %s as %s", **argv, what);
+		return -1;
+	}
+	NEXT_ARGP();
+
+	return 0;
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 79dc3f193547..828dde30e9ec 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -55,6 +55,7 @@ json_writer_t *json_wtr;
 bool pretty_output;
 bool json_output;
 bool show_pinned;
+int bpf_flags;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 
@@ -341,6 +342,7 @@ int main(int argc, char **argv)
 		{ "pretty",	no_argument,	NULL,	'p' },
 		{ "version",	no_argument,	NULL,	'V' },
 		{ "bpffs",	no_argument,	NULL,	'f' },
+		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -355,7 +357,7 @@ int main(int argc, char **argv)
 	hash_init(map_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjf",
+	while ((opt = getopt_long(argc, argv, "Vhpjfm",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -379,6 +381,9 @@ int main(int argc, char **argv)
 		case 'f':
 			show_pinned = true;
 			break;
+		case 'm':
+			bpf_flags = MAPS_RELAX_COMPAT;
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 40492cdc4e53..28ee769bd11b 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -74,7 +74,7 @@
 #define HELP_SPEC_PROGRAM						\
 	"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
 #define HELP_SPEC_OPTIONS						\
-	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }"
+	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} | {-m|--mapcompat}"
 #define HELP_SPEC_MAP							\
 	"MAP := { id MAP_ID | pinned FILE }"
 
@@ -89,6 +89,7 @@ extern const char *bin_name;
 extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
+extern int bpf_flags;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 
@@ -138,6 +139,7 @@ int do_cgroup(int argc, char **arg);
 int do_perf(int argc, char **arg);
 int do_net(int argc, char **arg);
 
+int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 6003e9598973..7bf38f0e152e 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -36,6 +36,7 @@
 #include <fcntl.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
+#include <net/if.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -94,6 +95,17 @@ static bool map_is_map_of_progs(__u32 type)
 	return type == BPF_MAP_TYPE_PROG_ARRAY;
 }
 
+static int map_type_from_str(const char *type)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(map_type_name); i++)
+		/* Don't allow prefixing in case of possible future shadowing */
+		if (map_type_name[i] && !strcmp(map_type_name[i], type))
+			return i;
+	return -1;
+}
+
 static void *alloc_value(struct bpf_map_info *info)
 {
 	if (map_is_per_cpu(info->type))
@@ -336,6 +348,25 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
 	jsonw_end_object(json_wtr);
 }
 
+static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
+			      const char *value)
+{
+	int value_size = strlen(value);
+	bool single_line, break_names;
+
+	break_names = info->key_size > 16 || value_size > 16;
+	single_line = info->key_size + value_size <= 24 && !break_names;
+
+	printf("key:%c", break_names ? '\n' : ' ');
+	fprint_hex(stdout, key, info->key_size, " ");
+
+	printf(single_line ? "  " : "\n");
+
+	printf("value:%c%s", break_names ? '\n' : ' ', value);
+
+	printf("\n");
+}
+
 static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 			      unsigned char *value)
 {
@@ -658,6 +689,54 @@ static int do_show(int argc, char **argv)
 	return errno == ENOENT ? 0 : -1;
 }
 
+static int dump_map_elem(int fd, void *key, void *value,
+			 struct bpf_map_info *map_info, struct btf *btf,
+			 json_writer_t *btf_wtr)
+{
+	int num_elems = 0;
+	int lookup_errno;
+
+	if (!bpf_map_lookup_elem(fd, key, value)) {
+		if (json_output) {
+			print_entry_json(map_info, key, value, btf);
+		} else {
+			if (btf) {
+				struct btf_dumper d = {
+					.btf = btf,
+					.jw = btf_wtr,
+					.is_plain_text = true,
+				};
+
+				do_dump_btf(&d, map_info, key, value);
+			} else {
+				print_entry_plain(map_info, key, value);
+			}
+			num_elems++;
+		}
+		return num_elems;
+	}
+
+	/* lookup error handling */
+	lookup_errno = errno;
+
+	if (map_is_map_of_maps(map_info->type) ||
+	    map_is_map_of_progs(map_info->type))
+		return 0;
+
+	if (json_output) {
+		jsonw_name(json_wtr, "key");
+		print_hex_data_json(key, map_info->key_size);
+		jsonw_name(json_wtr, "value");
+		jsonw_start_object(json_wtr);
+		jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
+		jsonw_end_object(json_wtr);
+	} else {
+		print_entry_error(map_info, key, strerror(lookup_errno));
+	}
+
+	return 0;
+}
+
 static int do_dump(int argc, char **argv)
 {
 	struct bpf_map_info info = {};
@@ -713,40 +792,7 @@ static int do_dump(int argc, char **argv)
 				err = 0;
 			break;
 		}
-
-		if (!bpf_map_lookup_elem(fd, key, value)) {
-			if (json_output)
-				print_entry_json(&info, key, value, btf);
-			else
-				if (btf) {
-					struct btf_dumper d = {
-						.btf = btf,
-						.jw = btf_wtr,
-						.is_plain_text = true,
-					};
-
-					do_dump_btf(&d, &info, key, value);
-				} else {
-					print_entry_plain(&info, key, value);
-				}
-			num_elems++;
-		} else if (!map_is_map_of_maps(info.type) &&
-			   !map_is_map_of_progs(info.type)) {
-			if (json_output) {
-				jsonw_name(json_wtr, "key");
-				print_hex_data_json(key, info.key_size);
-				jsonw_name(json_wtr, "value");
-				jsonw_start_object(json_wtr);
-				jsonw_string_field(json_wtr, "error",
-						   "can't lookup element");
-				jsonw_end_object(json_wtr);
-			} else {
-				p_info("can't lookup element with key: ");
-				fprint_hex(stderr, key, info.key_size, " ");
-				fprintf(stderr, "\n");
-			}
-		}
-
+		num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr);
 		prev_key = key;
 	}
 
@@ -1024,6 +1070,92 @@ static int do_pin(int argc, char **argv)
 	return err;
 }
 
+static int do_create(int argc, char **argv)
+{
+	struct bpf_create_map_attr attr = { NULL, };
+	const char *pinfile;
+	int err, fd;
+
+	if (!REQ_ARGS(7))
+		return -1;
+	pinfile = GET_ARG();
+
+	while (argc) {
+		if (!REQ_ARGS(2))
+			return -1;
+
+		if (is_prefix(*argv, "type")) {
+			NEXT_ARG();
+
+			if (attr.map_type) {
+				p_err("map type already specified");
+				return -1;
+			}
+
+			attr.map_type = map_type_from_str(*argv);
+			if ((int)attr.map_type < 0) {
+				p_err("unrecognized map type: %s", *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "name")) {
+			NEXT_ARG();
+			attr.name = GET_ARG();
+		} else if (is_prefix(*argv, "key")) {
+			if (parse_u32_arg(&argc, &argv, &attr.key_size,
+					  "key size"))
+				return -1;
+		} else if (is_prefix(*argv, "value")) {
+			if (parse_u32_arg(&argc, &argv, &attr.value_size,
+					  "value size"))
+				return -1;
+		} else if (is_prefix(*argv, "entries")) {
+			if (parse_u32_arg(&argc, &argv, &attr.max_entries,
+					  "max entries"))
+				return -1;
+		} else if (is_prefix(*argv, "flags")) {
+			if (parse_u32_arg(&argc, &argv, &attr.map_flags,
+					  "flags"))
+				return -1;
+		} else if (is_prefix(*argv, "dev")) {
+			NEXT_ARG();
+
+			if (attr.map_ifindex) {
+				p_err("offload device already specified");
+				return -1;
+			}
+
+			attr.map_ifindex = if_nametoindex(*argv);
+			if (!attr.map_ifindex) {
+				p_err("unrecognized netdevice '%s': %s",
+				      *argv, strerror(errno));
+				return -1;
+			}
+			NEXT_ARG();
+		}
+	}
+
+	if (!attr.name) {
+		p_err("map name not specified");
+		return -1;
+	}
+
+	fd = bpf_create_map_xattr(&attr);
+	if (fd < 0) {
+		p_err("map create failed: %s", strerror(errno));
+		return -1;
+	}
+
+	err = do_pin_fd(fd, pinfile);
+	close(fd);
+	if (err)
+		return err;
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -1033,6 +1165,9 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list }   [MAP]\n"
+		"       %s %s create     FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+		"                              entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+		"                              [dev NAME]\n"
 		"       %s %s dump       MAP\n"
 		"       %s %s update     MAP  key DATA value VALUE [UPDATE_FLAGS]\n"
 		"       %s %s lookup     MAP  key DATA\n"
@@ -1047,11 +1182,17 @@ static int do_help(int argc, char **argv)
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       VALUE := { DATA | MAP | PROG }\n"
 		"       UPDATE_FLAGS := { any | exist | noexist }\n"
+		"       TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n"
+		"                 percpu_array | stack_trace | cgroup_array | lru_hash |\n"
+		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
+		"                 devmap | sockmap | cpumap | xskmap | sockhash |\n"
+		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2]);
 
 	return 0;
 }
@@ -1067,6 +1208,7 @@ static const struct cmd cmds[] = {
 	{ "delete",	do_delete },
 	{ "pin",	do_pin },
 	{ "event_pipe",	do_event_pipe },
+	{ "create",	do_create },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b1cd3bc8db70..335028968dfb 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -77,6 +77,26 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_FLOW_DISSECTOR]	= "flow_dissector",
 };
 
+static const char * const attach_type_strings[] = {
+	[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+	[BPF_SK_MSG_VERDICT] = "msg_verdict",
+	[__MAX_BPF_ATTACH_TYPE] = NULL,
+};
+
+enum bpf_attach_type parse_attach_type(const char *str)
+{
+	enum bpf_attach_type type;
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+		if (attach_type_strings[type] &&
+		    is_prefix(str, attach_type_strings[type]))
+			return type;
+	}
+
+	return __MAX_BPF_ATTACH_TYPE;
+}
+
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 {
 	struct timespec real_time_ts, boot_time_ts;
@@ -697,6 +717,77 @@ int map_replace_compar(const void *p1, const void *p2)
 	return a->idx - b->idx;
 }
 
+static int do_attach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, mapfd, progfd;
+
+	if (!REQ_ARGS(5)) {
+		p_err("too few parameters for map attach");
+		return -EINVAL;
+	}
+
+	progfd = prog_parse_fd(&argc, &argv);
+	if (progfd < 0)
+		return progfd;
+
+	attach_type = parse_attach_type(*argv);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		return -EINVAL;
+	}
+	NEXT_ARG();
+
+	mapfd = map_parse_fd(&argc, &argv);
+	if (mapfd < 0)
+		return mapfd;
+
+	err = bpf_prog_attach(progfd, mapfd, attach_type, 0);
+	if (err) {
+		p_err("failed prog attach to map");
+		return -EINVAL;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
+
+static int do_detach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, mapfd, progfd;
+
+	if (!REQ_ARGS(5)) {
+		p_err("too few parameters for map detach");
+		return -EINVAL;
+	}
+
+	progfd = prog_parse_fd(&argc, &argv);
+	if (progfd < 0)
+		return progfd;
+
+	attach_type = parse_attach_type(*argv);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		return -EINVAL;
+	}
+	NEXT_ARG();
+
+	mapfd = map_parse_fd(&argc, &argv);
+	if (mapfd < 0)
+		return mapfd;
+
+	err = bpf_prog_detach2(progfd, mapfd, attach_type);
+	if (err) {
+		p_err("failed prog detach from map");
+		return -EINVAL;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
 static int do_load(int argc, char **argv)
 {
 	enum bpf_attach_type expected_attach_type;
@@ -817,7 +908,7 @@ static int do_load(int argc, char **argv)
 		}
 	}
 
-	obj = bpf_object__open_xattr(&attr);
+	obj = __bpf_object__open_xattr(&attr, bpf_flags);
 	if (IS_ERR_OR_NULL(obj)) {
 		p_err("failed to open object file");
 		goto err_free_reuse_maps;
@@ -942,6 +1033,8 @@ static int do_help(int argc, char **argv)
 		"       %s %s pin   PROG FILE\n"
 		"       %s %s load  OBJ  FILE [type TYPE] [dev NAME] \\\n"
 		"                         [map { idx IDX | name NAME } MAP]\n"
+		"       %s %s attach PROG ATTACH_TYPE MAP\n"
+		"       %s %s detach PROG ATTACH_TYPE MAP\n"
 		"       %s %s help\n"
 		"\n"
 		"       " HELP_SPEC_MAP "\n"
@@ -953,10 +1046,12 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
+		"       ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2]);
 
 	return 0;
 }
@@ -968,6 +1063,8 @@ static const struct cmd cmds[] = {
 	{ "dump",	do_dump },
 	{ "pin",	do_pin },
 	{ "load",	do_load },
+	{ "attach",	do_attach },
+	{ "detach",	do_detach },
 	{ 0 }
 };
 
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 6ad27257fd67..79d84413ddf2 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
 FEATURE_DISPLAY = libelf bpf
 
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
 
 check_feat := 1
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 87520a87a75f..69a4d40c4227 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -69,6 +69,9 @@ struct bpf_load_program_attr {
 	__u32 prog_ifindex;
 };
 
+/* Flags to direct loading requirements */
+#define MAPS_RELAX_COMPAT	0x01
+
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE (256 * 1024)
 int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ceb918c14d80..bd71efcc53be 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -19,7 +19,6 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
-#include <perf-sys.h>
 #include <asm/unistd.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -27,6 +26,7 @@
 #include <linux/btf.h>
 #include <linux/list.h>
 #include <linux/limits.h>
+#include <linux/perf_event.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -169,7 +169,7 @@ static LIST_HEAD(bpf_objects_list);
 
 struct bpf_object {
 	char license[64];
-	u32 kern_version;
+	__u32 kern_version;
 
 	struct bpf_program *programs;
 	size_t nr_programs;
@@ -540,7 +540,7 @@ static int
 bpf_object__init_kversion(struct bpf_object *obj,
 			  void *data, size_t size)
 {
-	u32 kver;
+	__u32 kver;
 
 	if (size != sizeof(kver)) {
 		pr_warning("invalid kver section in %s\n", obj->path);
@@ -562,8 +562,9 @@ static int compare_bpf_map(const void *_a, const void *_b)
 }
 
 static int
-bpf_object__init_maps(struct bpf_object *obj)
+bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
+	bool strict = !(flags & MAPS_RELAX_COMPAT);
 	int i, map_idx, map_def_sz, nr_maps = 0;
 	Elf_Scn *scn;
 	Elf_Data *data;
@@ -685,7 +686,8 @@ bpf_object__init_maps(struct bpf_object *obj)
 						   "has unrecognized, non-zero "
 						   "options\n",
 						   obj->path, map_name);
-					return -EINVAL;
+					if (strict)
+						return -EINVAL;
 				}
 			}
 			memcpy(&obj->maps[map_idx].def, def,
@@ -716,7 +718,7 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 	return false;
 }
 
-static int bpf_object__elf_collect(struct bpf_object *obj)
+static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 {
 	Elf *elf = obj->efile.elf;
 	GElf_Ehdr *ep = &obj->efile.ehdr;
@@ -843,7 +845,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		return LIBBPF_ERRNO__FORMAT;
 	}
 	if (obj->efile.maps_shndx >= 0) {
-		err = bpf_object__init_maps(obj);
+		err = bpf_object__init_maps(obj, flags);
 		if (err)
 			goto out;
 	}
@@ -1295,7 +1297,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 static int
 load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	     const char *name, struct bpf_insn *insns, int insns_cnt,
-	     char *license, u32 kern_version, int *pfd, int prog_ifindex)
+	     char *license, __u32 kern_version, int *pfd, int prog_ifindex)
 {
 	struct bpf_load_program_attr load_attr;
 	char *cp, errmsg[STRERR_BUFSIZE];
@@ -1515,7 +1517,7 @@ static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
 
 static struct bpf_object *
 __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
-		   bool needs_kver)
+		   bool needs_kver, int flags)
 {
 	struct bpf_object *obj;
 	int err;
@@ -1531,7 +1533,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
 
 	CHECK_ERR(bpf_object__elf_init(obj), err, out);
 	CHECK_ERR(bpf_object__check_endianness(obj), err, out);
-	CHECK_ERR(bpf_object__elf_collect(obj), err, out);
+	CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out);
 	CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
 	CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
 
@@ -1542,7 +1544,8 @@ out:
 	return ERR_PTR(err);
 }
 
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
+					    int flags)
 {
 	/* param validation */
 	if (!attr->file)
@@ -1551,7 +1554,13 @@ struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
 	pr_debug("loading %s\n", attr->file);
 
 	return __bpf_object__open(attr->file, NULL, 0,
-				  bpf_prog_type__needs_kver(attr->prog_type));
+				  bpf_prog_type__needs_kver(attr->prog_type),
+				  flags);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+	return __bpf_object__open_xattr(attr, 0);
 }
 
 struct bpf_object *bpf_object__open(const char *path)
@@ -1584,7 +1593,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 	pr_debug("loading object '%s' from buffer\n",
 		 name);
 
-	return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
+	return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
 }
 
 int bpf_object__unload(struct bpf_object *obj)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8af8d3663991..7e9c801a9fdd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -61,6 +61,8 @@ struct bpf_object_open_attr {
 
 struct bpf_object *bpf_object__open(const char *path);
 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
+					    int flags);
 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 					   size_t obj_buf_sz,
 					   const char *name);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1381ab81099c..d99dd6fc3fbe 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o
+	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
+	test_sk_lookup_kern.o test_xdp_vlan.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -49,7 +50,10 @@ TEST_PROGS := test_kmod.sh \
 	test_lwt_seg6local.sh \
 	test_lirc_mode2.sh \
 	test_skb_cgroup_id.sh \
-	test_flow_dissector.sh
+	test_flow_dissector.sh \
+	test_xdp_vlan.sh
+
+TEST_PROGS_EXTENDED := with_addr.sh
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 1d407b3494f9..fda8c162d0df 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -155,6 +155,10 @@ static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
 	(void *) BPF_FUNC_sk_lookup_udp;
 static int (*bpf_sk_release)(struct bpf_sock *sk) =
 	(void *) BPF_FUNC_sk_release;
+static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
+	(void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+	(void *) BPF_FUNC_skb_vlan_pop;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3655508f95fd..dd49df5e2df4 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -19,3 +19,4 @@ CONFIG_CRYPTO_SHA256=m
 CONFIG_VXLAN=y
 CONFIG_GENEVE=y
 CONFIG_NET_CLS_FLOWER=m
+CONFIG_LWTUNNEL=y
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index ac7de38e5c63..10a5fa83c75a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -71,6 +71,7 @@ int txmsg_start;
 int txmsg_end;
 int txmsg_ingress;
 int txmsg_skb;
+int ktls;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -92,6 +93,7 @@ static const struct option long_options[] = {
 	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
 	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{"ktls", no_argument,			&ktls, 1 },
 	{0, 0, NULL, 0 }
 };
 
@@ -112,6 +114,76 @@ static void usage(char *argv[])
 	printf("\n");
 }
 
+#define TCP_ULP 31
+#define TLS_TX 1
+#define TLS_RX 2
+#include <linux/tls.h>
+
+char *sock_to_string(int s)
+{
+	if (s == c1)
+		return "client1";
+	else if (s == c2)
+		return "client2";
+	else if (s == s1)
+		return "server1";
+	else if (s == s2)
+		return "server2";
+	else if (s == p1)
+		return "peer1";
+	else if (s == p2)
+		return "peer2";
+	else
+		return "unknown";
+}
+
+static int sockmap_init_ktls(int verbose, int s)
+{
+	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
+		.info = {
+			.version     = TLS_1_2_VERSION,
+			.cipher_type = TLS_CIPHER_AES_GCM_128,
+		},
+	};
+	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
+		.info = {
+			.version     = TLS_1_2_VERSION,
+			.cipher_type = TLS_CIPHER_AES_GCM_128,
+		},
+	};
+	int so_buf = 6553500;
+	int err;
+
+	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
+	if (err) {
+		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
+	if (err) {
+		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
+	if (err) {
+		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
+	if (err) {
+		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
+	if (err) {
+		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+
+	if (verbose)
+		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
+	return 0;
+}
 static int sockmap_init_sockets(int verbose)
 {
 	int i, err, one = 1;
@@ -456,6 +528,21 @@ static int sendmsg_test(struct sockmap_options *opt)
 	else
 		rx_fd = p2;
 
+	if (ktls) {
+		/* Redirecting into non-TLS socket which sends into a TLS
+		 * socket is not a valid test. So in this case lets not
+		 * enable kTLS but still run the test.
+		 */
+		if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+			err = sockmap_init_ktls(opt->verbose, rx_fd);
+			if (err)
+				return err;
+		}
+		err = sockmap_init_ktls(opt->verbose, c1);
+		if (err)
+			return err;
+	}
+
 	rxpid = fork();
 	if (rxpid == 0) {
 		if (opt->drop_expected)
@@ -907,6 +994,8 @@ static void test_options(char *options)
 		strncat(options, "ingress,", OPTSTRING);
 	if (txmsg_skb)
 		strncat(options, "skb,", OPTSTRING);
+	if (ktls)
+		strncat(options, "ktls,", OPTSTRING);
 }
 
 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index bc9cd8537467..cf4cd32b6772 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -48,7 +48,7 @@
 
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	8
+#define MAX_NR_MAPS	13
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
 
@@ -61,10 +61,14 @@ static bool unpriv_disabled = false;
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
-	int fixup_map1[MAX_FIXUPS];
-	int fixup_map2[MAX_FIXUPS];
-	int fixup_map3[MAX_FIXUPS];
-	int fixup_map4[MAX_FIXUPS];
+	int fixup_map_hash_8b[MAX_FIXUPS];
+	int fixup_map_hash_48b[MAX_FIXUPS];
+	int fixup_map_hash_16b[MAX_FIXUPS];
+	int fixup_map_array_48b[MAX_FIXUPS];
+	int fixup_map_sockmap[MAX_FIXUPS];
+	int fixup_map_sockhash[MAX_FIXUPS];
+	int fixup_map_xskmap[MAX_FIXUPS];
+	int fixup_map_stacktrace[MAX_FIXUPS];
 	int fixup_prog1[MAX_FIXUPS];
 	int fixup_prog2[MAX_FIXUPS];
 	int fixup_map_in_map[MAX_FIXUPS];
@@ -876,7 +880,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 2 },
+		.fixup_map_hash_8b = { 2 },
 		.errstr = "invalid indirect read from stack",
 		.result = REJECT,
 	},
@@ -1110,7 +1114,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 invalid mem access 'map_value_or_null'",
 		.result = REJECT,
 	},
@@ -1127,7 +1131,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "misaligned value access",
 		.result = REJECT,
 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -1147,7 +1151,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 invalid mem access",
 		.errstr_unpriv = "R0 leaks addr",
 		.result = REJECT,
@@ -1237,7 +1241,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_delete_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 24 },
+		.fixup_map_hash_8b = { 24 },
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -1391,7 +1395,7 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, pkt_type)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1414,7 +1418,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -12),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1438,7 +1442,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -13),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -2575,7 +2579,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr_unpriv = "R4 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2592,7 +2596,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "invalid indirect read from stack off -8+0 size 8",
 		.result = REJECT,
 	},
@@ -2894,7 +2898,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2934,7 +2938,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4073,7 +4077,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.result_unpriv = ACCEPT,
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -4089,7 +4093,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -4117,7 +4121,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 11 },
+		.fixup_map_hash_8b = { 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -4139,7 +4143,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -4161,7 +4165,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -4184,7 +4188,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -4199,7 +4203,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4227,7 +4231,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 11 },
+		.fixup_map_hash_8b = { 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -4249,7 +4253,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4271,7 +4275,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4542,6 +4546,85 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"prevent map lookup in sockmap",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_sockmap = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	},
+	{
+		"prevent map lookup in sockhash",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_sockhash = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	},
+	{
+		"prevent map lookup in xskmap",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_xskmap = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"prevent map lookup in stack trace",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_stacktrace = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"prevent map lookup in prog array",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog2 = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem",
+	},
+	{
 		"valid map access into an array with a constant",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -4555,7 +4638,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4577,7 +4660,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4601,7 +4684,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4629,7 +4712,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4649,7 +4732,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=48 size=8",
 		.result = REJECT,
 	},
@@ -4670,7 +4753,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 min value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4692,7 +4775,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4717,7 +4800,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
@@ -4744,7 +4827,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
@@ -4774,7 +4857,7 @@ static struct bpf_test tests[] = {
 				    offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3, 11 },
+		.fixup_map_hash_48b = { 3, 11 },
 		.errstr = "R0 pointer += pointer",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4807,7 +4890,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
@@ -4922,7 +5005,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
@@ -5024,7 +5107,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -5045,7 +5128,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -5066,7 +5149,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -5087,7 +5170,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -5113,7 +5196,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = REJECT,
 		.errstr = "R4 !read_ok",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -5141,7 +5224,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -5162,7 +5245,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT,
 		.errstr_unpriv = "R0 leaks addr",
@@ -5412,7 +5495,7 @@ static struct bpf_test tests[] = {
 				      offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 2 },
+		.fixup_map_hash_8b = { 2 },
 		.errstr_unpriv = "R2 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = REJECT,
@@ -5442,7 +5525,7 @@ static struct bpf_test tests[] = {
 				      offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.errstr_unpriv = "R2 leaks addr into ctx",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -5464,7 +5547,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr_unpriv = "R6 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -5484,7 +5567,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5503,7 +5586,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5521,7 +5604,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5541,7 +5624,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=56",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5561,7 +5644,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5585,7 +5668,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5606,7 +5689,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5626,7 +5709,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5650,7 +5733,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5672,7 +5755,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5694,7 +5777,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5719,7 +5802,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5741,7 +5824,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5761,7 +5844,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5786,7 +5869,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5809,7 +5892,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5832,7 +5915,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5858,7 +5941,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5881,7 +5964,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5903,7 +5986,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5925,7 +6008,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 unbounded memory access",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5951,7 +6034,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=45",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5975,7 +6058,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5998,7 +6081,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 unbounded memory access",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6022,7 +6105,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6045,7 +6128,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 unbounded memory access",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6070,7 +6153,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6094,7 +6177,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6118,7 +6201,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6143,7 +6226,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6167,7 +6250,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6191,7 +6274,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6210,7 +6293,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 8 },
+		.fixup_map_hash_16b = { 3, 8 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6230,7 +6313,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6250,8 +6333,8 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
-		.fixup_map3 = { 10 },
+		.fixup_map_hash_8b = { 3 },
+		.fixup_map_hash_16b = { 10 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=0 size=16",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6272,7 +6355,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 9 },
+		.fixup_map_hash_16b = { 3, 9 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6292,7 +6375,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 9 },
+		.fixup_map_hash_16b = { 3, 9 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6312,7 +6395,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 9 },
+		.fixup_map_hash_16b = { 3, 9 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6334,7 +6417,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6355,7 +6438,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6376,7 +6459,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6399,7 +6482,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 11 },
+		.fixup_map_hash_16b = { 3, 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6419,7 +6502,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = REJECT,
 		.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6442,7 +6525,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 11 },
+		.fixup_map_hash_16b = { 3, 11 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=9 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6464,7 +6547,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6485,7 +6568,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6502,7 +6585,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R1 !read_ok",
 		.errstr = "R1 !read_ok",
 		.result = REJECT,
@@ -6536,7 +6619,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6564,7 +6647,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6583,7 +6666,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 bitwise operator &= on pointer",
 		.result = REJECT,
 	},
@@ -6600,7 +6683,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 32-bit pointer arithmetic prohibited",
 		.result = REJECT,
 	},
@@ -6617,7 +6700,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 pointer arithmetic with /= operator",
 		.result = REJECT,
 	},
@@ -6634,7 +6717,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
@@ -6658,7 +6741,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
 	},
@@ -6681,7 +6764,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6927,7 +7010,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6953,7 +7036,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=49",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6981,7 +7064,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -7008,7 +7091,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -7080,7 +7163,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -7105,7 +7188,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -7128,7 +7211,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -7209,7 +7292,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -7230,7 +7313,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -7250,7 +7333,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -7325,7 +7408,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -7355,7 +7438,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -7708,7 +7791,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7732,7 +7815,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7758,7 +7841,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7783,7 +7866,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7807,7 +7890,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7831,7 +7914,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7877,7 +7960,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7902,7 +7985,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7928,7 +8011,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7953,7 +8036,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7980,7 +8063,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -8006,7 +8089,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -8035,7 +8118,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -8065,7 +8148,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
 	},
@@ -8093,7 +8176,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -8120,7 +8203,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 	},
@@ -8145,7 +8228,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 	},
@@ -8171,7 +8254,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -8196,7 +8279,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 4294967295",
 		.result = REJECT
 	},
@@ -8222,7 +8305,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 min value is outside of the array range",
 		.result = REJECT
 	},
@@ -8246,7 +8329,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "value_size=8 off=1073741825",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -8271,7 +8354,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "value 1073741823",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -8307,7 +8390,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -8346,7 +8429,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		/* not actually fully unbounded, but the bound is very high */
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
@@ -8389,7 +8472,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		/* not actually fully unbounded, but the bound is very high */
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
@@ -8418,7 +8501,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -8445,7 +8528,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT
 	},
@@ -8475,7 +8558,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
 	},
@@ -8495,7 +8578,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 2147483646",
 		.result = REJECT
 	},
@@ -8517,7 +8600,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "pointer offset 1073741822",
 		.result = REJECT
 	},
@@ -8538,7 +8621,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "pointer offset -1073741822",
 		.result = REJECT
 	},
@@ -8560,7 +8643,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 1000000000000",
 		.result = REJECT
 	},
@@ -8580,7 +8663,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
@@ -8601,7 +8684,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
@@ -8669,7 +8752,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.errstr = "variable stack read R2",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
@@ -8750,7 +8833,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
@@ -10284,7 +10367,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 16 },
+		.fixup_map_hash_8b = { 16 },
 		.result = REJECT,
 		.errstr = "R0 min value is outside of the array range",
 	},
@@ -11235,7 +11318,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 0 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = ACCEPT,
 	},
 	{
@@ -11290,7 +11373,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 1 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = ACCEPT,
 	},
 	{
@@ -11345,7 +11428,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 1 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = REJECT,
 		.errstr = "invalid read from stack off -16+0 size 8",
 	},
@@ -11417,7 +11500,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
 	},
@@ -11489,7 +11572,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = ACCEPT,
 	},
 	{
@@ -11560,7 +11643,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -8),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
 	},
@@ -11632,7 +11715,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = ACCEPT,
 	},
 	{
@@ -11703,7 +11786,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "R0 invalid mem access 'inv'",
 	},
@@ -12048,7 +12131,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 13 },
+		.fixup_map_hash_8b = { 13 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -12075,7 +12158,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 6 },
+		.fixup_map_hash_48b = { 6 },
 		.errstr = "invalid indirect read from stack off -8+0 size 8",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -12107,8 +12190,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map2 = { 13 },
-		.fixup_map4 = { 16 },
+		.fixup_map_hash_48b = { 13 },
+		.fixup_map_array_48b = { 16 },
 		.result = ACCEPT,
 		.retval = 1,
 	},
@@ -12140,7 +12223,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.fixup_map_in_map = { 16 },
-		.fixup_map4 = { 13 },
+		.fixup_map_array_48b = { 13 },
 		.result = REJECT,
 		.errstr = "R0 invalid mem access 'map_ptr'",
 	},
@@ -12208,7 +12291,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R6 invalid mem access 'inv'",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -12232,7 +12315,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "invalid read from stack off -16+0 size 8",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -12354,7 +12437,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = REJECT,
 		.errstr = "misaligned value access off",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -12464,7 +12547,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_get_stack),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 4 },
+		.fixup_map_hash_48b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -13511,10 +13594,14 @@ static char bpf_vlog[UINT_MAX >> 8];
 static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
 			  struct bpf_insn *prog, int *map_fds)
 {
-	int *fixup_map1 = test->fixup_map1;
-	int *fixup_map2 = test->fixup_map2;
-	int *fixup_map3 = test->fixup_map3;
-	int *fixup_map4 = test->fixup_map4;
+	int *fixup_map_hash_8b = test->fixup_map_hash_8b;
+	int *fixup_map_hash_48b = test->fixup_map_hash_48b;
+	int *fixup_map_hash_16b = test->fixup_map_hash_16b;
+	int *fixup_map_array_48b = test->fixup_map_array_48b;
+	int *fixup_map_sockmap = test->fixup_map_sockmap;
+	int *fixup_map_sockhash = test->fixup_map_sockhash;
+	int *fixup_map_xskmap = test->fixup_map_xskmap;
+	int *fixup_map_stacktrace = test->fixup_map_stacktrace;
 	int *fixup_prog1 = test->fixup_prog1;
 	int *fixup_prog2 = test->fixup_prog2;
 	int *fixup_map_in_map = test->fixup_map_in_map;
@@ -13528,40 +13615,40 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
 	 * for verifier and not do a runtime lookup, so the only thing
 	 * that really matters is value size in this case.
 	 */
-	if (*fixup_map1) {
+	if (*fixup_map_hash_8b) {
 		map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 					sizeof(long long), 1);
 		do {
-			prog[*fixup_map1].imm = map_fds[0];
-			fixup_map1++;
-		} while (*fixup_map1);
+			prog[*fixup_map_hash_8b].imm = map_fds[0];
+			fixup_map_hash_8b++;
+		} while (*fixup_map_hash_8b);
 	}
 
-	if (*fixup_map2) {
+	if (*fixup_map_hash_48b) {
 		map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 					sizeof(struct test_val), 1);
 		do {
-			prog[*fixup_map2].imm = map_fds[1];
-			fixup_map2++;
-		} while (*fixup_map2);
+			prog[*fixup_map_hash_48b].imm = map_fds[1];
+			fixup_map_hash_48b++;
+		} while (*fixup_map_hash_48b);
 	}
 
-	if (*fixup_map3) {
+	if (*fixup_map_hash_16b) {
 		map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 					sizeof(struct other_val), 1);
 		do {
-			prog[*fixup_map3].imm = map_fds[2];
-			fixup_map3++;
-		} while (*fixup_map3);
+			prog[*fixup_map_hash_16b].imm = map_fds[2];
+			fixup_map_hash_16b++;
+		} while (*fixup_map_hash_16b);
 	}
 
-	if (*fixup_map4) {
+	if (*fixup_map_array_48b) {
 		map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
 					sizeof(struct test_val), 1);
 		do {
-			prog[*fixup_map4].imm = map_fds[3];
-			fixup_map4++;
-		} while (*fixup_map4);
+			prog[*fixup_map_array_48b].imm = map_fds[3];
+			fixup_map_array_48b++;
+		} while (*fixup_map_array_48b);
 	}
 
 	if (*fixup_prog1) {
@@ -13603,6 +13690,38 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
 			fixup_percpu_cgroup_storage++;
 		} while (*fixup_percpu_cgroup_storage);
 	}
+	if (*fixup_map_sockmap) {
+		map_fds[9] = create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_sockmap].imm = map_fds[9];
+			fixup_map_sockmap++;
+		} while (*fixup_map_sockmap);
+	}
+	if (*fixup_map_sockhash) {
+		map_fds[10] = create_map(BPF_MAP_TYPE_SOCKHASH, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_sockhash].imm = map_fds[10];
+			fixup_map_sockhash++;
+		} while (*fixup_map_sockhash);
+	}
+	if (*fixup_map_xskmap) {
+		map_fds[11] = create_map(BPF_MAP_TYPE_XSKMAP, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_xskmap].imm = map_fds[11];
+			fixup_map_xskmap++;
+		} while (*fixup_map_xskmap);
+	}
+	if (*fixup_map_stacktrace) {
+		map_fds[12] = create_map(BPF_MAP_TYPE_STACK_TRACE, sizeof(u32),
+					 sizeof(u64), 1);
+		do {
+			prog[*fixup_map_stacktrace].imm = map_fds[12];
+			fixup_map_stacktrace++;
+		} while (fixup_map_stacktrace);
+	}
 }
 
 static void do_test_single(struct bpf_test *test, bool unpriv,
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/test_xdp_vlan.c
new file mode 100644
index 000000000000..365a7d2d9f5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *  Copyright(c) 2018 Jesper Dangaard Brouer.
+ *
+ * XDP/TC VLAN manipulation example
+ *
+ * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
+ * else the VLAN tags are NOT inlined in the packet payload:
+ *
+ *  # ethtool -K ixgbe2 rxvlan off
+ *
+ * Verify setting:
+ *  # ethtool -k ixgbe2 | grep rx-vlan-offload
+ *  rx-vlan-offload: off
+ *
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
+ *
+ *	struct vlan_hdr - vlan header
+ *	@h_vlan_TCI: priority and VLAN ID
+ *	@h_vlan_encapsulated_proto: packet type ID or len
+ */
+struct _vlan_hdr {
+	__be16 h_vlan_TCI;
+	__be16 h_vlan_encapsulated_proto;
+};
+#define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT		13
+#define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT	VLAN_CFI_MASK
+#define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
+#define VLAN_N_VID		4096
+
+struct parse_pkt {
+	__u16 l3_proto;
+	__u16 l3_offset;
+	__u16 vlan_outer;
+	__u16 vlan_inner;
+	__u8  vlan_outer_offset;
+	__u8  vlan_inner_offset;
+};
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline
+bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
+{
+	__u16 eth_type;
+	__u8 offset;
+
+	offset = sizeof(*eth);
+	/* Make sure packet is large enough for parsing eth + 2 VLAN headers */
+	if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
+		return false;
+
+	eth_type = eth->h_proto;
+
+	/* Handle outer VLAN tag */
+	if (eth_type == bpf_htons(ETH_P_8021Q)
+	    || eth_type == bpf_htons(ETH_P_8021AD)) {
+		struct _vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		pkt->vlan_outer_offset = offset;
+		pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+				& VLAN_VID_MASK;
+		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
+		offset += sizeof(*vlan_hdr);
+	}
+
+	/* Handle inner (double) VLAN tag */
+	if (eth_type == bpf_htons(ETH_P_8021Q)
+	    || eth_type == bpf_htons(ETH_P_8021AD)) {
+		struct _vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		pkt->vlan_inner_offset = offset;
+		pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+				& VLAN_VID_MASK;
+		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
+		offset += sizeof(*vlan_hdr);
+	}
+
+	pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
+	pkt->l3_offset = offset;
+
+	return true;
+}
+
+/* Hint, VLANs are choosen to hit network-byte-order issues */
+#define TESTVLAN 4011 /* 0xFAB */
+// #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
+
+SEC("xdp_drop_vlan_4011")
+int  xdp_prognum0(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Drop specific VLAN ID example */
+	if (pkt.vlan_outer == TESTVLAN)
+		return XDP_ABORTED;
+	/*
+	 * Using XDP_ABORTED makes it possible to record this event,
+	 * via tracepoint xdp:xdp_exception like:
+	 *  # perf record -a -e xdp:xdp_exception
+	 *  # perf script
+	 */
+	return XDP_PASS;
+}
+/*
+Commands to setup VLAN on Linux to test packets gets dropped:
+
+ export ROOTDEV=ixgbe2
+ export VLANID=4011
+ ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
+ ip link set dev  $ROOTDEV.$VLANID up
+
+ ip link set dev $ROOTDEV mtu 1508
+ ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
+
+Load prog with ip tool:
+
+ ip link set $ROOTDEV xdp off
+ ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
+
+*/
+
+/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
+#define TO_VLAN	0
+
+SEC("xdp_vlan_change")
+int  xdp_prognum1(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Change specific VLAN ID */
+	if (pkt.vlan_outer == TESTVLAN) {
+		struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
+
+		/* Modifying VLAN, preserve top 4 bits */
+		vlan_hdr->h_vlan_TCI =
+			bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+				  | TO_VLAN);
+	}
+
+	return XDP_PASS;
+}
+
+/*
+ * Show XDP+TC can cooperate, on creating a VLAN rewriter.
+ * 1. Create a XDP prog that can "pop"/remove a VLAN header.
+ * 2. Create a TC-bpf prog that egress can add a VLAN header.
+ */
+
+#ifndef ETH_ALEN /* Ethernet MAC address length */
+#define ETH_ALEN	6	/* bytes */
+#endif
+#define VLAN_HDR_SZ	4	/* bytes */
+
+SEC("xdp_vlan_remove_outer")
+int  xdp_prognum2(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+	char *dest;
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Skip packet if no outer VLAN was detected */
+	if (pkt.vlan_outer_offset == 0)
+		return XDP_PASS;
+
+	/* Moving Ethernet header, dest overlap with src, memmove handle this */
+	dest = data;
+	dest+= VLAN_HDR_SZ;
+	/*
+	 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
+	 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
+	 */
+	__builtin_memmove(dest, data, ETH_ALEN * 2);
+	/* Note: LLVM built-in memmove inlining require size to be constant */
+
+	/* Move start of packet header seen by Linux kernel stack */
+	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+	return XDP_PASS;
+}
+
+static __always_inline
+void shift_mac_4bytes_16bit(void *data)
+{
+	__u16 *p = data;
+
+	p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
+	p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
+	p[5] = p[3];
+	p[4] = p[2];
+	p[3] = p[1];
+	p[2] = p[0];
+}
+
+static __always_inline
+void shift_mac_4bytes_32bit(void *data)
+{
+	__u32 *p = data;
+
+	/* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
+	 * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
+	 * The vlan_hdr->h_vlan_encapsulated_proto take over role as
+	 * ethhdr->h_proto.
+	 */
+	p[3] = p[2];
+	p[2] = p[1];
+	p[1] = p[0];
+}
+
+SEC("xdp_vlan_remove_outer2")
+int  xdp_prognum3(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct ethhdr *orig_eth = data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(orig_eth, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Skip packet if no outer VLAN was detected */
+	if (pkt.vlan_outer_offset == 0)
+		return XDP_PASS;
+
+	/* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
+	shift_mac_4bytes_32bit(data);
+
+	/* Move start of packet header seen by Linux kernel stack */
+	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+	return XDP_PASS;
+}
+
+/*=====================================
+ *  BELOW: TC-hook based ebpf programs
+ * ====================================
+ * The TC-clsact eBPF programs (currently) need to be attach via TC commands
+ */
+
+SEC("tc_vlan_push")
+int _tc_progA(struct __sk_buff *ctx)
+{
+	bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
+
+	return TC_ACT_OK;
+}
+/*
+Commands to setup TC to use above bpf prog:
+
+export ROOTDEV=ixgbe2
+export FILE=xdp_vlan01_kern.o
+
+# Re-attach clsact to clear/flush existing role
+tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
+tc qdisc add dev $ROOTDEV clsact
+
+# Attach BPF prog EGRESS
+tc filter add dev $ROOTDEV egress \
+  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+tc filter show dev $ROOTDEV egress
+*/
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
new file mode 100755
index 000000000000..51a3a31d1aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+TESTNAME=xdp_vlan
+
+usage() {
+  echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
+  echo ""
+  echo "Usage: $0 [-vfh]"
+  echo "  -v | --verbose : Verbose"
+  echo "  --flush        : Flush before starting (e.g. after --interactive)"
+  echo "  --interactive  : Keep netns setup running after test-run"
+  echo ""
+}
+
+cleanup()
+{
+	local status=$?
+
+	if [ "$status" = "0" ]; then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+
+	if [ -n "$INTERACTIVE" ]; then
+		echo "Namespace setup still active explore with:"
+		echo " ip netns exec ns1 bash"
+		echo " ip netns exec ns2 bash"
+		exit $status
+	fi
+
+	set +e
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o hvfi: \
+    --long verbose,flush,help,interactive,debug -- "$@")
+if (( $? != 0 )); then
+    usage
+    echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
+    exit 2
+fi
+eval set -- "$OPTIONS"
+
+##  --- Parse command line arguments / parameters ---
+while true; do
+	case "$1" in
+	    -v | --verbose)
+		export VERBOSE=yes
+		shift
+		;;
+	    -i | --interactive | --debug )
+		INTERACTIVE=yes
+		shift
+		;;
+	    -f | --flush )
+		cleanup
+		shift
+		;;
+	    -- )
+		shift
+		break
+		;;
+	    -h | --help )
+		usage;
+		echo "selftests: $TESTNAME [SKIP] usage help info requested"
+		exit 0
+		;;
+	    * )
+		shift
+		break
+		;;
+	esac
+done
+
+if [ "$EUID" -ne 0 ]; then
+	echo "selftests: $TESTNAME [FAILED] need root privileges"
+	exit 1
+fi
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: $TESTNAME [SKIP] need ip xdp support"
+	exit 0
+fi
+
+# Interactive mode likely require us to cleanup netns
+if [ -n "$INTERACTIVE" ]; then
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+fi
+
+# Exit on failure
+set -e
+
+# Some shell-tools dependencies
+which ip > /dev/null
+which tc > /dev/null
+which ethtool > /dev/null
+
+# Make rest of shell verbose, showing comments as doc/info
+if [ -n "$VERBOSE" ]; then
+    set -v
+fi
+
+# Create two namespaces
+ip netns add ns1
+ip netns add ns2
+
+# Run cleanup if failing or on kill
+trap cleanup 0 2 3 6 9
+
+# Create veth pair
+ip link add veth1 type veth peer name veth2
+
+# Move veth1 and veth2 into the respective namespaces
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+# NOTICE: XDP require VLAN header inside packet payload
+#  - Thus, disable VLAN offloading driver features
+#  - For veth REMEMBER TX side VLAN-offload
+#
+# Disable rx-vlan-offload (mostly needed on ns1)
+ip netns exec ns1 ethtool -K veth1 rxvlan off
+ip netns exec ns2 ethtool -K veth2 rxvlan off
+#
+# Disable tx-vlan-offload (mostly needed on ns2)
+ip netns exec ns2 ethtool -K veth2 txvlan off
+ip netns exec ns1 ethtool -K veth1 txvlan off
+
+export IPADDR1=100.64.41.1
+export IPADDR2=100.64.41.2
+
+# In ns1/veth1 add IP-addr on plain net_device
+ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ns1 ip link set veth1 up
+
+# In ns2/veth2 create VLAN device
+export VLAN=4011
+export DEVNS2=veth2
+ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ns2 ip link set $DEVNS2 up
+ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+
+# Bringup lo in netns (to avoids confusing people using --interactive)
+ip netns exec ns1 ip link set lo up
+ip netns exec ns2 ip link set lo up
+
+# At this point, the hosts cannot reach each-other,
+# because ns2 are using VLAN tags on the packets.
+
+ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Okay ping fails"'
+
+
+# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
+# ----------------------------------------------------------------------
+# In ns1: ingress use XDP to remove VLAN tags
+export DEVNS1=veth1
+export FILE=test_xdp_vlan.o
+
+# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+export XDP_PROG=xdp_vlan_change
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# In ns1: egress use TC to add back VLAN tag 4011
+#  (del cmd)
+#  tc qdisc del dev $DEVNS1 clsact 2> /dev/null
+#
+ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
+ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+# Now the namespaces can reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
+
+# Second test: Replace xdp prog, that fully remove vlan header
+#
+# Catch kernel bug for generic-XDP, that does didn't allow us to
+# remove a VLAN header, because skb->protocol still contain VLAN
+# ETH_P_8021Q indication, and this cause overwriting of our changes.
+#
+export XDP_PROG=xdp_vlan_remove_outer2
+ip netns exec ns1 ip link set $DEVNS1 xdp off
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# Now the namespaces should still be able reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2