44 files changed, 1828 insertions, 222 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index a5edea07b8d5..41e2feb0cf4f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -25,8 +25,6 @@
 
 #include "bpf_jit_32.h"
 
-int bpf_jit_enable __read_mostly;
-
 /*
  * eBPF prog stack layout:
  *
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index cbfe890b0ee5..0775d5ab8ee9 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -31,8 +31,6 @@
 
 #include "bpf_jit.h"
 
-int bpf_jit_enable __read_mostly;
-
 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 44b925005dd3..4d8cb9bb8365 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1207,8 +1207,6 @@ jmp_cmp:
 	return 0;
 }
 
-int bpf_jit_enable __read_mostly;
-
 void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 97069a1b6f43..4e347030ed2c 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -177,8 +177,6 @@ static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
 		(ctx->idx * 4) - 4;
 }
 
-int bpf_jit_enable __read_mostly;
-
 enum which_ebpf_reg {
 	src_reg,
 	src_reg_no_fp,
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index f9941b3b5770..872d1f6dd11e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -18,8 +18,6 @@
 
 #include "bpf_jit32.h"
 
-int bpf_jit_enable __read_mostly;
-
 static inline void bpf_flush_icache(void *start, void *end)
 {
 	smp_wmb();
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 6771c63b2bec..217a78e84865 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -21,8 +21,6 @@
 
 #include "bpf_jit64.h"
 
-int bpf_jit_enable __read_mostly;
-
 static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 {
 	memset32(area, BREAKPOINT_INSTRUCTION, size/4);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1dfadbd126f3..e50188773ff3 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -28,8 +28,6 @@
 #include <asm/set_memory.h>
 #include "bpf_jit.h"
 
-int bpf_jit_enable __read_mostly;
-
 struct bpf_jit {
 	u32 seen;		/* Flags to remember seen eBPF instructions */
 	u32 seen_reg[16];	/* Array to remember which registers are used */
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index 09e318eb34ee..3bd8ca95e521 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -11,8 +11,6 @@
 
 #include "bpf_jit_32.h"
 
-int bpf_jit_enable __read_mostly;
-
 static inline bool is_simm13(unsigned int value)
 {
 	return value + 0x1000 < 0x2000;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 635fdefd4ae2..50a24d7bd4c5 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -12,8 +12,6 @@
 
 #include "bpf_jit_64.h"
 
-int bpf_jit_enable __read_mostly;
-
 static inline bool is_simm13(unsigned int value)
 {
 	return value + 0x1000 < 0x2000;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 87f214fbe66e..5acee5139e28 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -15,8 +15,6 @@
 #include <asm/set_memory.h>
 #include <linux/bpf.h>
 
-int bpf_jit_enable __read_mostly;
-
 /*
  * assembly code in arch/x86/net/bpf_jit.S
  */
@@ -154,6 +152,11 @@ static bool is_ereg(u32 reg)
 			     BIT(BPF_REG_AX));
 }
 
+static bool is_axreg(u32 reg)
+{
+	return reg == BPF_REG_0;
+}
+
 /* add modifiers if 'reg' maps to x64 registers r8..r15 */
 static u8 add_1mod(u8 byte, u32 reg)
 {
@@ -447,16 +450,36 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			else if (is_ereg(dst_reg))
 				EMIT1(add_1mod(0x40, dst_reg));
 
+			/* b3 holds 'normal' opcode, b2 short form only valid
+			 * in case dst is eax/rax.
+			 */
 			switch (BPF_OP(insn->code)) {
-			case BPF_ADD: b3 = 0xC0; break;
-			case BPF_SUB: b3 = 0xE8; break;
-			case BPF_AND: b3 = 0xE0; break;
-			case BPF_OR: b3 = 0xC8; break;
-			case BPF_XOR: b3 = 0xF0; break;
+			case BPF_ADD:
+				b3 = 0xC0;
+				b2 = 0x05;
+				break;
+			case BPF_SUB:
+				b3 = 0xE8;
+				b2 = 0x2D;
+				break;
+			case BPF_AND:
+				b3 = 0xE0;
+				b2 = 0x25;
+				break;
+			case BPF_OR:
+				b3 = 0xC8;
+				b2 = 0x0D;
+				break;
+			case BPF_XOR:
+				b3 = 0xF0;
+				b2 = 0x35;
+				break;
 			}
 
 			if (is_imm8(imm32))
 				EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
+			else if (is_axreg(dst_reg))
+				EMIT1_off32(b2, imm32);
 			else
 				EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
 			break;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 71e6586acc36..80d3aa0fc9d3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -157,7 +157,14 @@ nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
 			int tag)
 {
 	struct sk_buff *skb;
-	int err;
+	int i, err;
+
+	for (i = 0; i < 50; i++) {
+		udelay(4);
+		skb = nfp_bpf_reply(bpf, tag);
+		if (skb)
+			return skb;
+	}
 
 	err = wait_event_interruptible_timeout(bpf->cmsg_wq,
 					       skb = nfp_bpf_reply(bpf, tag),
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index e2859b2e9c6a..1a357aacc444 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -127,6 +127,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
 	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
 	unsigned int stack_size;
 	unsigned int max_instr;
+	int err;
 
 	stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
 	if (prog->aux->stack_depth > stack_size) {
@@ -143,7 +144,14 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
 	if (!nfp_prog->prog)
 		return -ENOMEM;
 
-	return nfp_bpf_jit(nfp_prog);
+	err = nfp_bpf_jit(nfp_prog);
+	if (err)
+		return err;
+
+	prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
+	prog->aux->offload->jited_image = nfp_prog->prog;
+
+	return 0;
 }
 
 static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
@@ -168,6 +176,8 @@ nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
 static int
 nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
 {
+	if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY)
+		return -EINVAL;
 	return nfp_bpf_ctrl_del_entry(offmap, key);
 }
 
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 5134d5c1306c..b3851bbefad3 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -17,6 +17,7 @@
 #include <linux/bpf_verifier.h>
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
+#include <linux/mutex.h>
 #include <linux/rtnetlink.h>
 #include <net/pkt_cls.h>
 
@@ -31,6 +32,19 @@ struct nsim_bpf_bound_prog {
 	struct list_head l;
 };
 
+#define NSIM_BPF_MAX_KEYS		2
+
+struct nsim_bpf_bound_map {
+	struct netdevsim *ns;
+	struct bpf_offloaded_map *map;
+	struct mutex mutex;
+	struct nsim_map_entry {
+		void *key;
+		void *value;
+	} entry[NSIM_BPF_MAX_KEYS];
+	struct list_head l;
+};
+
 static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data)
 {
 	const char **str = file->private;
@@ -284,6 +298,224 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
 	return 0;
 }
 
+static bool
+nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key)
+{
+	return e->key && !memcmp(key, e->key, map->key_size);
+}
+
+static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(nmap->entry); i++)
+		if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key))
+			return i;
+
+	return -ENOENT;
+}
+
+static int
+nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+
+	nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER);
+	if (!nmap->entry[idx].key)
+		return -ENOMEM;
+	nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER);
+	if (!nmap->entry[idx].value) {
+		kfree(nmap->entry[idx].key);
+		nmap->entry[idx].key = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+nsim_map_get_next_key(struct bpf_offloaded_map *offmap,
+		      void *key, void *next_key)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+	int idx = -ENOENT;
+
+	mutex_lock(&nmap->mutex);
+
+	if (key)
+		idx = nsim_map_key_find(offmap, key);
+	if (idx == -ENOENT)
+		idx = 0;
+	else
+		idx++;
+
+	for (; idx < ARRAY_SIZE(nmap->entry); idx++) {
+		if (nmap->entry[idx].key) {
+			memcpy(next_key, nmap->entry[idx].key,
+			       offmap->map.key_size);
+			break;
+		}
+	}
+
+	mutex_unlock(&nmap->mutex);
+
+	if (idx == ARRAY_SIZE(nmap->entry))
+		return -ENOENT;
+	return 0;
+}
+
+static int
+nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+	int idx;
+
+	mutex_lock(&nmap->mutex);
+
+	idx = nsim_map_key_find(offmap, key);
+	if (idx >= 0)
+		memcpy(value, nmap->entry[idx].value, offmap->map.value_size);
+
+	mutex_unlock(&nmap->mutex);
+
+	return idx < 0 ? idx : 0;
+}
+
+static int
+nsim_map_update_elem(struct bpf_offloaded_map *offmap,
+		     void *key, void *value, u64 flags)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+	int idx, err = 0;
+
+	mutex_lock(&nmap->mutex);
+
+	idx = nsim_map_key_find(offmap, key);
+	if (idx < 0 && flags == BPF_EXIST) {
+		err = idx;
+		goto exit_unlock;
+	}
+	if (idx >= 0 && flags == BPF_NOEXIST) {
+		err = -EEXIST;
+		goto exit_unlock;
+	}
+
+	if (idx < 0) {
+		for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++)
+			if (!nmap->entry[idx].key)
+				break;
+		if (idx == ARRAY_SIZE(nmap->entry)) {
+			err = -E2BIG;
+			goto exit_unlock;
+		}
+
+		err = nsim_map_alloc_elem(offmap, idx);
+		if (err)
+			goto exit_unlock;
+	}
+
+	memcpy(nmap->entry[idx].key, key, offmap->map.key_size);
+	memcpy(nmap->entry[idx].value, value, offmap->map.value_size);
+exit_unlock:
+	mutex_unlock(&nmap->mutex);
+
+	return err;
+}
+
+static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+	int idx;
+
+	if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY)
+		return -EINVAL;
+
+	mutex_lock(&nmap->mutex);
+
+	idx = nsim_map_key_find(offmap, key);
+	if (idx >= 0) {
+		kfree(nmap->entry[idx].key);
+		kfree(nmap->entry[idx].value);
+		memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx]));
+	}
+
+	mutex_unlock(&nmap->mutex);
+
+	return idx < 0 ? idx : 0;
+}
+
+static const struct bpf_map_dev_ops nsim_bpf_map_ops = {
+	.map_get_next_key	= nsim_map_get_next_key,
+	.map_lookup_elem	= nsim_map_lookup_elem,
+	.map_update_elem	= nsim_map_update_elem,
+	.map_delete_elem	= nsim_map_delete_elem,
+};
+
+static int
+nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
+{
+	struct nsim_bpf_bound_map *nmap;
+	unsigned int i;
+	int err;
+
+	if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY &&
+		    offmap->map.map_type != BPF_MAP_TYPE_HASH))
+		return -EINVAL;
+	if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS)
+		return -ENOMEM;
+	if (offmap->map.map_flags)
+		return -EINVAL;
+
+	nmap = kzalloc(sizeof(*nmap), GFP_USER);
+	if (!nmap)
+		return -ENOMEM;
+
+	offmap->dev_priv = nmap;
+	nmap->ns = ns;
+	nmap->map = offmap;
+	mutex_init(&nmap->mutex);
+
+	if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) {
+		for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) {
+			u32 *key;
+
+			err = nsim_map_alloc_elem(offmap, i);
+			if (err)
+				goto err_free;
+			key = nmap->entry[i].key;
+			*key = i;
+		}
+	}
+
+	offmap->dev_ops = &nsim_bpf_map_ops;
+	list_add_tail(&nmap->l, &ns->bpf_bound_maps);
+
+	return 0;
+
+err_free:
+	while (--i) {
+		kfree(nmap->entry[i].key);
+		kfree(nmap->entry[i].value);
+	}
+	kfree(nmap);
+	return err;
+}
+
+static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap)
+{
+	struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) {
+		kfree(nmap->entry[i].key);
+		kfree(nmap->entry[i].value);
+	}
+	list_del_init(&nmap->l);
+	mutex_destroy(&nmap->mutex);
+	kfree(nmap);
+}
+
 int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 {
 	struct netdevsim *ns = netdev_priv(dev);
@@ -328,6 +560,14 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 			return err;
 
 		return nsim_xdp_set_prog(ns, bpf);
+	case BPF_OFFLOAD_MAP_ALLOC:
+		if (!ns->bpf_map_accept)
+			return -EOPNOTSUPP;
+
+		return nsim_bpf_map_alloc(ns, bpf->offmap);
+	case BPF_OFFLOAD_MAP_FREE:
+		nsim_bpf_map_free(bpf->offmap);
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -336,6 +576,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 int nsim_bpf_init(struct netdevsim *ns)
 {
 	INIT_LIST_HEAD(&ns->bpf_bound_progs);
+	INIT_LIST_HEAD(&ns->bpf_bound_maps);
 
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
@@ -362,12 +603,17 @@ int nsim_bpf_init(struct netdevsim *ns)
 	debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
 			    &ns->bpf_xdpoffload_accept);
 
+	ns->bpf_map_accept = true;
+	debugfs_create_bool("bpf_map_accept", 0600, ns->ddir,
+			    &ns->bpf_map_accept);
+
 	return 0;
 }
 
 void nsim_bpf_uninit(struct netdevsim *ns)
 {
 	WARN_ON(!list_empty(&ns->bpf_bound_progs));
+	WARN_ON(!list_empty(&ns->bpf_bound_maps));
 	WARN_ON(ns->xdp_prog);
 	WARN_ON(ns->bpf_offloaded);
 }
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 32270de9395a..b80361200302 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -61,6 +61,9 @@ struct netdevsim {
 	bool bpf_tc_non_bound_accept;
 	bool bpf_xdpdrv_accept;
 	bool bpf_xdpoffload_accept;
+
+	bool bpf_map_accept;
+	struct list_head bpf_bound_maps;
 };
 
 extern struct dentry *nsim_ddir;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5c2c104dc2c5..66df387106de 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -234,6 +234,8 @@ struct bpf_prog_offload {
 	struct list_head	offloads;
 	bool			dev_state;
 	const struct bpf_prog_offload_ops *dev_ops;
+	void			*jited_image;
+	u32			jited_len;
 };
 
 struct bpf_prog_aux {
@@ -584,6 +586,8 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
 int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
 			       struct bpf_prog *prog);
 
+int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map);
+
 int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
 int bpf_map_offload_update_elem(struct bpf_map *map,
 				void *key, void *value, u64 flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7c2259e8bc54..406c19d6016b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -17,7 +17,7 @@
 #define BPF_ALU64	0x07	/* alu mode in double word width */
 
 /* ld/ldx fields */
-#define BPF_DW		0x18	/* double word */
+#define BPF_DW		0x18	/* double word (64-bit) */
 #define BPF_XADD	0xc0	/* exclusive add */
 
 /* alu/jmp fields */
@@ -938,6 +938,9 @@ struct bpf_map_info {
 	__u32 max_entries;
 	__u32 map_flags;
 	char  name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h
index 18be90725ab0..ee97668bdadb 100644
--- a/include/uapi/linux/bpf_common.h
+++ b/include/uapi/linux/bpf_common.h
@@ -15,9 +15,10 @@
 
 /* ld/ldx fields */
 #define BPF_SIZE(code)  ((code) & 0x18)
-#define		BPF_W		0x00
-#define		BPF_H		0x08
-#define		BPF_B		0x10
+#define		BPF_W		0x00 /* 32-bit */
+#define		BPF_H		0x08 /* 16-bit */
+#define		BPF_B		0x10 /*  8-bit */
+/* eBPF		BPF_DW		0x18    64-bit */
 #define BPF_MODE(code)  ((code) & 0xe0)
 #define		BPF_IMM		0x00
 #define		BPF_ABS		0x20
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ab94d304a634..b1f66480135b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -49,27 +49,35 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
 }
 
 /* Called from syscall */
-static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+static int array_map_alloc_check(union bpf_attr *attr)
 {
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int numa_node = bpf_map_attr_numa_node(attr);
-	u32 elem_size, index_mask, max_entries;
-	bool unpriv = !capable(CAP_SYS_ADMIN);
-	struct bpf_array *array;
-	u64 array_size, mask64;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
 	    attr->value_size == 0 ||
 	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
 	    (percpu && numa_node != NUMA_NO_NODE))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	if (attr->value_size > KMALLOC_MAX_SIZE)
 		/* if value_size is bigger, the user space won't be able to
 		 * access the elements.
 		 */
-		return ERR_PTR(-E2BIG);
+		return -E2BIG;
+
+	return 0;
+}
+
+static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+{
+	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+	int numa_node = bpf_map_attr_numa_node(attr);
+	u32 elem_size, index_mask, max_entries;
+	bool unpriv = !capable(CAP_SYS_ADMIN);
+	struct bpf_array *array;
+	u64 array_size, mask64;
 
 	elem_size = round_up(attr->value_size, 8);
 
@@ -112,12 +120,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.unpriv_array = unpriv;
 
 	/* copy mandatory map attributes */
-	array->map.map_type = attr->map_type;
-	array->map.key_size = attr->key_size;
-	array->map.value_size = attr->value_size;
-	array->map.max_entries = attr->max_entries;
-	array->map.map_flags = attr->map_flags;
-	array->map.numa_node = numa_node;
+	bpf_map_init_from_attr(&array->map, attr);
 	array->elem_size = elem_size;
 
 	if (!percpu)
@@ -327,6 +330,7 @@ static void array_map_free(struct bpf_map *map)
 }
 
 const struct bpf_map_ops array_map_ops = {
+	.map_alloc_check = array_map_alloc_check,
 	.map_alloc = array_map_alloc,
 	.map_free = array_map_free,
 	.map_get_next_key = array_map_get_next_key,
@@ -337,6 +341,7 @@ const struct bpf_map_ops array_map_ops = {
 };
 
 const struct bpf_map_ops percpu_array_map_ops = {
+	.map_alloc_check = array_map_alloc_check,
 	.map_alloc = array_map_alloc,
 	.map_free = array_map_free,
 	.map_get_next_key = array_map_get_next_key,
@@ -345,12 +350,12 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_delete_elem = array_map_delete_elem,
 };
 
-static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
+static int fd_array_map_alloc_check(union bpf_attr *attr)
 {
 	/* only file descriptors can be stored in this type of map */
 	if (attr->value_size != sizeof(u32))
-		return ERR_PTR(-EINVAL);
-	return array_map_alloc(attr);
+		return -EINVAL;
+	return array_map_alloc_check(attr);
 }
 
 static void fd_array_map_free(struct bpf_map *map)
@@ -474,7 +479,8 @@ void bpf_fd_array_map_clear(struct bpf_map *map)
 }
 
 const struct bpf_map_ops prog_array_map_ops = {
-	.map_alloc = fd_array_map_alloc,
+	.map_alloc_check = fd_array_map_alloc_check,
+	.map_alloc = array_map_alloc,
 	.map_free = fd_array_map_free,
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = fd_array_map_lookup_elem,
@@ -561,7 +567,8 @@ static void perf_event_fd_array_release(struct bpf_map *map,
 }
 
 const struct bpf_map_ops perf_event_array_map_ops = {
-	.map_alloc = fd_array_map_alloc,
+	.map_alloc_check = fd_array_map_alloc_check,
+	.map_alloc = array_map_alloc,
 	.map_free = fd_array_map_free,
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = fd_array_map_lookup_elem,
@@ -592,7 +599,8 @@ static void cgroup_fd_array_free(struct bpf_map *map)
 }
 
 const struct bpf_map_ops cgroup_array_map_ops = {
-	.map_alloc = fd_array_map_alloc,
+	.map_alloc_check = fd_array_map_alloc_check,
+	.map_alloc = array_map_alloc,
 	.map_free = cgroup_fd_array_free,
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = fd_array_map_lookup_elem,
@@ -610,7 +618,7 @@ static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
 	if (IS_ERR(inner_map_meta))
 		return inner_map_meta;
 
-	map = fd_array_map_alloc(attr);
+	map = array_map_alloc(attr);
 	if (IS_ERR(map)) {
 		bpf_map_meta_free(inner_map_meta);
 		return map;
@@ -673,6 +681,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
 }
 
 const struct bpf_map_ops array_of_maps_map_ops = {
+	.map_alloc_check = fd_array_map_alloc_check,
 	.map_alloc = array_of_map_alloc,
 	.map_free = array_of_map_free,
 	.map_get_next_key = array_map_get_next_key,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 887e28408f98..3aa0658add76 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -300,6 +300,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 }
 
 #ifdef CONFIG_BPF_JIT
+/* All BPF JIT sysctl knobs here. */
+int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
+int bpf_jit_harden   __read_mostly;
+int bpf_jit_kallsyms __read_mostly;
+
 static __always_inline void
 bpf_get_prog_addr_region(const struct bpf_prog *prog,
 			 unsigned long *symbol_start,
@@ -381,8 +386,6 @@ static DEFINE_SPINLOCK(bpf_lock);
 static LIST_HEAD(bpf_kallsyms);
 static struct latch_tree_root bpf_tree __cacheline_aligned;
 
-int bpf_jit_kallsyms __read_mostly;
-
 static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
 {
 	WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
@@ -563,8 +566,6 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
 	bpf_prog_unlock_free(fp);
 }
 
-int bpf_jit_harden __read_mostly;
-
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
 			      const struct bpf_insn *aux,
 			      struct bpf_insn *to_buff)
@@ -1379,9 +1380,13 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
 }
 
 #else
-static unsigned int __bpf_prog_ret0(const void *ctx,
-				    const struct bpf_insn *insn)
+static unsigned int __bpf_prog_ret0_warn(const void *ctx,
+					 const struct bpf_insn *insn)
 {
+	/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
+	 * is not working properly, so warn about it!
+	 */
+	WARN_ON_ONCE(1);
 	return 0;
 }
 #endif
@@ -1441,7 +1446,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 
 	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
 #else
-	fp->bpf_func = __bpf_prog_ret0;
+	fp->bpf_func = __bpf_prog_ret0_warn;
 #endif
 
 	/* eBPF JITs can rewrite the program in case constant
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 584e02227671..d7ea96218516 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -591,9 +591,100 @@ unlock:
 	raw_spin_unlock(&trie->lock);
 }
 
-static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
 {
-	return -ENOTSUPP;
+	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+	struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
+	struct lpm_trie_node *node, *next_node = NULL, *parent;
+	struct lpm_trie_node **node_stack = NULL;
+	struct lpm_trie_node __rcu **root;
+	int err = 0, stack_ptr = -1;
+	unsigned int next_bit;
+	size_t matchlen;
+
+	/* The get_next_key follows postorder. For the 4 node example in
+	 * the top of this file, the trie_get_next_key() returns the following
+	 * one after another:
+	 *   192.168.0.0/24
+	 *   192.168.1.0/24
+	 *   192.168.128.0/24
+	 *   192.168.0.0/16
+	 *
+	 * The idea is to return more specific keys before less specific ones.
+	 */
+
+	/* Empty trie */
+	if (!rcu_dereference(trie->root))
+		return -ENOENT;
+
+	/* For invalid key, find the leftmost node in the trie */
+	if (!key || key->prefixlen > trie->max_prefixlen) {
+		root = &trie->root;
+		goto find_leftmost;
+	}
+
+	node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *),
+			     GFP_USER | __GFP_NOWARN);
+	if (!node_stack)
+		return -ENOMEM;
+
+	/* Try to find the exact node for the given key */
+	for (node = rcu_dereference(trie->root); node;) {
+		node_stack[++stack_ptr] = node;
+		matchlen = longest_prefix_match(trie, node, key);
+		if (node->prefixlen != matchlen ||
+		    node->prefixlen == key->prefixlen)
+			break;
+
+		next_bit = extract_bit(key->data, node->prefixlen);
+		node = rcu_dereference(node->child[next_bit]);
+	}
+	if (!node || node->prefixlen != key->prefixlen ||
+	    (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+		root = &trie->root;
+		goto find_leftmost;
+	}
+
+	/* The node with the exactly-matching key has been found,
+	 * find the first node in postorder after the matched node.
+	 */
+	node = node_stack[stack_ptr];
+	while (stack_ptr > 0) {
+		parent = node_stack[stack_ptr - 1];
+		if (rcu_dereference(parent->child[0]) == node &&
+		    rcu_dereference(parent->child[1])) {
+			root = &parent->child[1];
+			goto find_leftmost;
+		}
+		if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
+			next_node = parent;
+			goto do_copy;
+		}
+
+		node = parent;
+		stack_ptr--;
+	}
+
+	/* did not find anything */
+	err = -ENOENT;
+	goto free_stack;
+
+find_leftmost:
+	/* Find the leftmost non-intermediate node, all intermediate nodes
+	 * have exact two children, so this function will never return NULL.
+	 */
+	for (node = rcu_dereference(*root); node;) {
+		if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+			next_node = node;
+		node = rcu_dereference(node->child[0]);
+	}
+do_copy:
+	next_key->prefixlen = next_node->prefixlen;
+	memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data),
+	       next_node->data, trie->data_size);
+free_stack:
+	kfree(node_stack);
+	return err;
 }
 
 const struct bpf_map_ops trie_map_ops = {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index a88cebf368bf..c9401075b58c 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -230,9 +230,12 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
 		.prog	= prog,
 		.info	= info,
 	};
+	struct bpf_prog_aux *aux = prog->aux;
 	struct inode *ns_inode;
 	struct path ns_path;
+	char __user *uinsns;
 	void *res;
+	u32 ulen;
 
 	res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args);
 	if (IS_ERR(res)) {
@@ -241,6 +244,26 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
 		return PTR_ERR(res);
 	}
 
+	down_read(&bpf_devs_lock);
+
+	if (!aux->offload) {
+		up_read(&bpf_devs_lock);
+		return -ENODEV;
+	}
+
+	ulen = info->jited_prog_len;
+	info->jited_prog_len = aux->offload->jited_len;
+	if (info->jited_prog_len & ulen) {
+		uinsns = u64_to_user_ptr(info->jited_prog_insns);
+		ulen = min_t(u32, info->jited_prog_len, ulen);
+		if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
+			up_read(&bpf_devs_lock);
+			return -EFAULT;
+		}
+	}
+
+	up_read(&bpf_devs_lock);
+
 	ns_inode = ns_path.dentry->d_inode;
 	info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
 	info->netns_ino = ns_inode->i_ino;
@@ -276,7 +299,8 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 
 	if (!capable(CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
-	if (attr->map_type != BPF_MAP_TYPE_HASH)
+	if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
+	    attr->map_type != BPF_MAP_TYPE_HASH)
 		return ERR_PTR(-EINVAL);
 
 	offmap = kzalloc(sizeof(*offmap), GFP_USER);
@@ -389,6 +413,61 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return ret;
 }
 
+struct ns_get_path_bpf_map_args {
+	struct bpf_offloaded_map *offmap;
+	struct bpf_map_info *info;
+};
+
+static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data)
+{
+	struct ns_get_path_bpf_map_args *args = private_data;
+	struct ns_common *ns;
+	struct net *net;
+
+	rtnl_lock();
+	down_read(&bpf_devs_lock);
+
+	if (args->offmap->netdev) {
+		args->info->ifindex = args->offmap->netdev->ifindex;
+		net = dev_net(args->offmap->netdev);
+		get_net(net);
+		ns = &net->ns;
+	} else {
+		args->info->ifindex = 0;
+		ns = NULL;
+	}
+
+	up_read(&bpf_devs_lock);
+	rtnl_unlock();
+
+	return ns;
+}
+
+int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
+{
+	struct ns_get_path_bpf_map_args args = {
+		.offmap	= map_to_offmap(map),
+		.info	= info,
+	};
+	struct inode *ns_inode;
+	struct path ns_path;
+	void *res;
+
+	res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args);
+	if (IS_ERR(res)) {
+		if (!info->ifindex)
+			return -ENODEV;
+		return PTR_ERR(res);
+	}
+
+	ns_inode = ns_path.dentry->d_inode;
+	info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+	info->netns_ino = ns_inode->i_ino;
+	path_put(&ns_path);
+
+	return 0;
+}
+
 bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
 {
 	struct bpf_offloaded_map *offmap;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c691b9e972e3..5bdb0cc84ad2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1504,6 +1504,8 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
 	struct bpf_prog *prog;
 	int ret = -ENOTSUPP;
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
 		return -EINVAL;
 
@@ -1724,19 +1726,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		goto done;
 	}
 
-	ulen = info.jited_prog_len;
-	info.jited_prog_len = prog->jited_len;
-	if (info.jited_prog_len && ulen) {
-		if (bpf_dump_raw_ok()) {
-			uinsns = u64_to_user_ptr(info.jited_prog_insns);
-			ulen = min_t(u32, info.jited_prog_len, ulen);
-			if (copy_to_user(uinsns, prog->bpf_func, ulen))
-				return -EFAULT;
-		} else {
-			info.jited_prog_insns = 0;
-		}
-	}
-
 	ulen = info.xlated_prog_len;
 	info.xlated_prog_len = bpf_prog_insn_size(prog);
 	if (info.xlated_prog_len && ulen) {
@@ -1762,6 +1751,24 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		err = bpf_prog_offload_info_fill(&info, prog);
 		if (err)
 			return err;
+		goto done;
+	}
+
+	/* NOTE: the following code is supposed to be skipped for offload.
+	 * bpf_prog_offload_info_fill() is the place to fill similar fields
+	 * for offload.
+	 */
+	ulen = info.jited_prog_len;
+	info.jited_prog_len = prog->jited_len;
+	if (info.jited_prog_len && ulen) {
+		if (bpf_dump_raw_ok()) {
+			uinsns = u64_to_user_ptr(info.jited_prog_insns);
+			ulen = min_t(u32, info.jited_prog_len, ulen);
+			if (copy_to_user(uinsns, prog->bpf_func, ulen))
+				return -EFAULT;
+		} else {
+			info.jited_prog_insns = 0;
+		}
 	}
 
 done:
@@ -1794,6 +1801,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 	info.map_flags = map->map_flags;
 	memcpy(info.name, map->name, sizeof(map->name));
 
+	if (bpf_map_is_dev_bound(map)) {
+		err = bpf_map_offload_info_fill(&info, map);
+		if (err)
+			return err;
+	}
+
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
 		return -EFAULT;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8063dffd91a8..dfb138b46488 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1850,6 +1850,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	}
 }
 
+static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
+{
+	return type == ARG_PTR_TO_MEM ||
+	       type == ARG_PTR_TO_MEM_OR_NULL ||
+	       type == ARG_PTR_TO_UNINIT_MEM;
+}
+
+static bool arg_type_is_mem_size(enum bpf_arg_type type)
+{
+	return type == ARG_CONST_SIZE ||
+	       type == ARG_CONST_SIZE_OR_ZERO;
+}
+
 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			  enum bpf_arg_type arg_type,
 			  struct bpf_call_arg_meta *meta)
@@ -1899,9 +1912,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		expected_type = PTR_TO_CTX;
 		if (type != expected_type)
 			goto err_type;
-	} else if (arg_type == ARG_PTR_TO_MEM ||
-		   arg_type == ARG_PTR_TO_MEM_OR_NULL ||
-		   arg_type == ARG_PTR_TO_UNINIT_MEM) {
+	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
 		 * passed in as argument, it's a SCALAR_VALUE type. Final test
@@ -1962,25 +1973,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			err = check_stack_boundary(env, regno,
 						   meta->map_ptr->value_size,
 						   false, NULL);
-	} else if (arg_type == ARG_CONST_SIZE ||
-		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
+	} else if (arg_type_is_mem_size(arg_type)) {
 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 
-		/* bpf_xxx(..., buf, len) call will access 'len' bytes
-		 * from stack pointer 'buf'. Check it
-		 * note: regno == len, regno - 1 == buf
-		 */
-		if (regno == 0) {
-			/* kernel subsystem misconfigured verifier */
-			verbose(env,
-				"ARG_CONST_SIZE cannot be first argument\n");
-			return -EACCES;
-		}
-
 		/* The register is SCALAR_VALUE; the access check
 		 * happens using its boundaries.
 		 */
-
 		if (!tnum_is_const(reg->var_off))
 			/* For unprivileged variable accesses, disable raw
 			 * mode so that the program is required to
@@ -2124,7 +2122,7 @@ error:
 	return -EINVAL;
 }
 
-static int check_raw_mode(const struct bpf_func_proto *fn)
+static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
 {
 	int count = 0;
 
@@ -2139,7 +2137,44 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
 		count++;
 
-	return count > 1 ? -EINVAL : 0;
+	/* We only support one arg being in raw mode at the moment,
+	 * which is sufficient for the helper functions we have
+	 * right now.
+	 */
+	return count <= 1;
+}
+
+static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
+				    enum bpf_arg_type arg_next)
+{
+	return (arg_type_is_mem_ptr(arg_curr) &&
+	        !arg_type_is_mem_size(arg_next)) ||
+	       (!arg_type_is_mem_ptr(arg_curr) &&
+		arg_type_is_mem_size(arg_next));
+}
+
+static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
+{
+	/* bpf_xxx(..., buf, len) call will access 'len'
+	 * bytes from memory 'buf'. Both arg types need
+	 * to be paired, so make sure there's no buggy
+	 * helper function specification.
+	 */
+	if (arg_type_is_mem_size(fn->arg1_type) ||
+	    arg_type_is_mem_ptr(fn->arg5_type)  ||
+	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
+	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
+	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
+	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
+		return false;
+
+	return true;
+}
+
+static int check_func_proto(const struct bpf_func_proto *fn)
+{
+	return check_raw_mode_ok(fn) &&
+	       check_arg_pair_ok(fn) ? 0 : -EINVAL;
 }
 
 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2295,7 +2330,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
 	if (env->ops->get_func_proto)
 		fn = env->ops->get_func_proto(func_id);
-
 	if (!fn) {
 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
 			func_id);
@@ -2319,10 +2353,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	memset(&meta, 0, sizeof(meta));
 	meta.pkt_access = fn->pkt_access;
 
-	/* We only support one arg being in raw mode at the moment, which
-	 * is sufficient for the helper functions we have right now.
-	 */
-	err = check_raw_mode(fn);
+	err = check_func_proto(fn);
 	if (err) {
 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
 			func_id_name(func_id), func_id);
@@ -4803,7 +4834,8 @@ process_bpf_exit:
 		insn_idx++;
 	}
 
-	verbose(env, "processed %d insns, stack depth ", insn_processed);
+	verbose(env, "processed %d insns (limit %d), stack depth ",
+		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
 	for (i = 0; i < env->subprog_cnt + 1; i++) {
 		u32 depth = env->subprog_stack_depth[i];
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f274468cbc45..fc2838ac8b78 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -245,7 +245,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
  */
 #define __BPF_TP_EMIT()	__BPF_ARG3_TP()
 #define __BPF_TP(...)							\
-	__trace_printk(1 /* Fake ip will not be printed. */,		\
+	__trace_printk(0 /* Fake ip */,					\
 		       fmt, ##__VA_ARGS__)
 
 #define __BPF_ARG1_TP(...)						\
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index f369889e521d..e3938e395cba 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6109,6 +6109,110 @@ static struct bpf_test tests[] = {
 		{ { ETH_HLEN, 42 } },
 		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
 	},
+	/* Checking interpreter vs JIT wrt signed extended imms. */
+	{
+		"JNE signed compare, test 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 3",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_REG(BPF_JNE, R2, R4, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 4",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, -17104896),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 5",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0xfefb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 6",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x7efb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 7",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, 2),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ { 0, 2 } },
+	},
 };
 
 static struct net_device dev;
diff --git a/net/core/filter.c b/net/core/filter.c
index 08491b3b8742..18da42a81d0c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2865,7 +2865,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
 	.arg4_type	= ARG_PTR_TO_MEM,
-	.arg5_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
 static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -3154,7 +3154,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
 	.arg4_type	= ARG_PTR_TO_MEM,
-	.arg5_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
@@ -3460,6 +3460,8 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_xdp_event_output_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_csum_diff:
+		return &bpf_csum_diff_proto;
 	case BPF_FUNC_xdp_adjust_head:
 		return &bpf_xdp_adjust_head_proto;
 	case BPF_FUNC_xdp_adjust_meta:
@@ -4530,6 +4532,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = {
 };
 
 const struct bpf_prog_ops sk_filter_prog_ops = {
+	.test_run		= bpf_prog_test_run_skb,
 };
 
 const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a47ad6cd41c0..f2d0462611c3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,6 +25,7 @@
 
 static int zero = 0;
 static int one = 1;
+static int two __maybe_unused = 2;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
 static int max_skb_frags = MAX_SKB_FRAGS;
@@ -250,6 +251,46 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
 	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 }
 
+#ifdef CONFIG_BPF_JIT
+static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
+					   void __user *buffer, size_t *lenp,
+					   loff_t *ppos)
+{
+	int ret, jit_enable = *(int *)table->data;
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	tmp.data = &jit_enable;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (jit_enable < 2 ||
+		    (jit_enable == 2 && bpf_dump_raw_ok())) {
+			*(int *)table->data = jit_enable;
+			if (jit_enable == 2)
+				pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
+		} else {
+			ret = -EPERM;
+		}
+	}
+	return ret;
+}
+
+# ifdef CONFIG_HAVE_EBPF_JIT
+static int
+proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+				    void __user *buffer, size_t *lenp,
+				    loff_t *ppos)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+# endif
+#endif
+
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
@@ -325,13 +366,14 @@ static struct ctl_table net_core_table[] = {
 		.data		= &bpf_jit_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-		.proc_handler	= proc_dointvec
-#else
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dointvec_minmax_bpf_enable,
+# ifdef CONFIG_BPF_JIT_ALWAYS_ON
 		.extra1		= &one,
 		.extra2		= &one,
-#endif
+# else
+		.extra1		= &zero,
+		.extra2		= &two,
+# endif
 	},
 # ifdef CONFIG_HAVE_EBPF_JIT
 	{
@@ -339,14 +381,18 @@ static struct ctl_table net_core_table[] = {
 		.data		= &bpf_jit_harden,
 		.maxlen		= sizeof(int),
 		.mode		= 0600,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax_bpf_restricted,
+		.extra1		= &zero,
+		.extra2		= &two,
 	},
 	{
 		.procname	= "bpf_jit_kallsyms",
 		.data		= &bpf_jit_kallsyms,
 		.maxlen		= sizeof(int),
 		.mode		= 0600,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax_bpf_restricted,
+		.extra1		= &zero,
+		.extra2		= &one,
 	},
 # endif
 #endif
diff --git a/net/socket.c b/net/socket.c
index fbfae1ed3ff5..1536515b6437 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2613,15 +2613,6 @@ out_fs:
 
 core_initcall(sock_init);	/* early initcall */
 
-static int __init jit_init(void)
-{
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
-	bpf_jit_enable = 1;
-#endif
-	return 0;
-}
-pure_initcall(jit_init);
-
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
index 12e1024069c2..0c12048ac79f 100644
--- a/samples/bpf/xdp2skb_meta_kern.c
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -35,15 +35,17 @@ int _xdp_mark(struct xdp_md *ctx)
 	void *data, *data_end;
 	int ret;
 
-	/* Reserve space in-front data pointer for our meta info.
+	/* Reserve space in-front of data pointer for our meta info.
 	 * (Notice drivers not supporting data_meta will fail here!)
 	 */
 	ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
 	if (ret < 0)
 		return XDP_ABORTED;
 
-	/* For some unknown reason, these ctx pointers must be read
-	 * after bpf_xdp_adjust_meta, else verifier will reject prog.
+	/* Notice: Kernel-side verifier requires that loading of
+	 * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(),
+	 * as pkt-data pointers are invalidated.  Helpers that require
+	 * this are determined/marked by bpf_helper_changes_pkt_data()
 	 */
 	data = (void *)(unsigned long)ctx->data;
 
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index c969141bfa8b..211db8ded0de 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -1,6 +1,7 @@
-/* XDP monitor tool, based on tracepoints
+/* SPDX-License-Identifier: GPL-2.0
+ *  Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
  *
- *  Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * XDP monitor tool, based on tracepoints
  */
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
@@ -118,3 +119,92 @@ int trace_xdp_exception(struct xdp_exception_ctx *ctx)
 
 	return 0;
 }
+
+/* Common stats data record shared with _user.c */
+struct datarec {
+	u64 processed;
+	u64 dropped;
+	u64 info;
+};
+#define MAX_CPUS 64
+
+struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
+	.type		= BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size	= sizeof(u32),
+	.value_size	= sizeof(struct datarec),
+	.max_entries	= MAX_CPUS,
+};
+
+struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
+	.type		= BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size	= sizeof(u32),
+	.value_size	= sizeof(struct datarec),
+	.max_entries	= 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+	u64 __pad;		// First 8 bytes are not accessible by bpf code
+	int map_id;		//	offset:8;  size:4; signed:1;
+	u32 act;		//	offset:12; size:4; signed:0;
+	int cpu;		//	offset:16; size:4; signed:1;
+	unsigned int drops;	//	offset:20; size:4; signed:0;
+	unsigned int processed;	//	offset:24; size:4; signed:0;
+	int to_cpu;		//	offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+	u32 to_cpu = ctx->to_cpu;
+	struct datarec *rec;
+
+	if (to_cpu >= MAX_CPUS)
+		return 1;
+
+	rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->processed;
+	rec->dropped   += ctx->drops;
+
+	/* Record bulk events, then userspace can calc average bulk size */
+	if (ctx->processed > 0)
+		rec->info += 1;
+
+	return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+	u64 __pad;		// First 8 bytes are not accessible by bpf code
+	int map_id;		//	offset:8;  size:4; signed:1;
+	u32 act;		//	offset:12; size:4; signed:0;
+	int cpu;		//	offset:16; size:4; signed:1;
+	unsigned int drops;	//	offset:20; size:4; signed:0;
+	unsigned int processed;	//	offset:24; size:4; signed:0;
+	int sched;		//	offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+	struct datarec *rec;
+	u32 key = 0;
+
+	rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->processed;
+	rec->dropped   += ctx->drops;
+
+	/* Count times kthread yielded CPU via schedule call */
+	if (ctx->sched)
+		rec->info++;
+
+	return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index eaba165b3549..eec14520d513 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,4 +1,5 @@
-/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
  */
 static const char *__doc__=
  "XDP monitor tool, based on tracepoints\n"
@@ -40,6 +41,9 @@ static const struct option long_options[] = {
 	{0, 0, NULL,  0 }
 };
 
+/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
+#define EXIT_FAIL_MEM	5
+
 static void usage(char *argv[])
 {
 	int i;
@@ -108,23 +112,93 @@ static const char *action2str(int action)
 	return NULL;
 }
 
+/* Common stats data record shared with _kern.c */
+struct datarec {
+	__u64 processed;
+	__u64 dropped;
+	__u64 info;
+};
+#define MAX_CPUS 64
+
+/* Userspace structs for collection of stats from maps */
 struct record {
-	__u64 counter;
 	__u64 timestamp;
+	struct datarec total;
+	struct datarec *cpu;
+};
+struct u64rec {
+	__u64 processed;
+};
+struct record_u64 {
+	/* record for _kern side __u64 values */
+	__u64 timestamp;
+	struct u64rec total;
+	struct u64rec *cpu;
 };
 
 struct stats_record {
-	struct record xdp_redir[REDIR_RES_MAX];
-	struct record xdp_exception[XDP_ACTION_MAX];
+	struct record_u64 xdp_redirect[REDIR_RES_MAX];
+	struct record_u64 xdp_exception[XDP_ACTION_MAX];
+	struct record xdp_cpumap_kthread;
+	struct record xdp_cpumap_enqueue[MAX_CPUS];
 };
 
-static void stats_print_headers(bool err_only)
+static bool map_collect_record(int fd, __u32 key, struct record *rec)
 {
-	if (err_only)
-		printf("\n%s\n", __doc_err_only__);
+	/* For percpu maps, userspace gets a value per possible CPU */
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct datarec values[nr_cpus];
+	__u64 sum_processed = 0;
+	__u64 sum_dropped = 0;
+	__u64 sum_info = 0;
+	int i;
+
+	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+		fprintf(stderr,
+			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+		return false;
+	}
+	/* Get time as close as possible to reading map contents */
+	rec->timestamp = gettime();
 
-	printf("%-14s %-11s %-10s %-18s %-9s\n",
-	       "ACTION", "result", "pps ", "pps-human-readable", "measure-period");
+	/* Record and sum values from each CPU */
+	for (i = 0; i < nr_cpus; i++) {
+		rec->cpu[i].processed = values[i].processed;
+		sum_processed        += values[i].processed;
+		rec->cpu[i].dropped = values[i].dropped;
+		sum_dropped        += values[i].dropped;
+		rec->cpu[i].info = values[i].info;
+		sum_info        += values[i].info;
+	}
+	rec->total.processed = sum_processed;
+	rec->total.dropped   = sum_dropped;
+	rec->total.info      = sum_info;
+	return true;
+}
+
+static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
+{
+	/* For percpu maps, userspace gets a value per possible CPU */
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct u64rec values[nr_cpus];
+	__u64 sum_total = 0;
+	int i;
+
+	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+		fprintf(stderr,
+			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+		return false;
+	}
+	/* Get time as close as possible to reading map contents */
+	rec->timestamp = gettime();
+
+	/* Record and sum values from each CPU */
+	for (i = 0; i < nr_cpus; i++) {
+		rec->cpu[i].processed = values[i].processed;
+		sum_total            += values[i].processed;
+	}
+	rec->total.processed = sum_total;
+	return true;
 }
 
 static double calc_period(struct record *r, struct record *p)
@@ -139,77 +213,203 @@ static double calc_period(struct record *r, struct record *p)
 	return period_;
 }
 
-static double calc_pps(struct record *r, struct record *p, double period)
+static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
+{
+	double period_ = 0;
+	__u64 period = 0;
+
+	period = r->timestamp - p->timestamp;
+	if (period > 0)
+		period_ = ((double) period / NANOSEC_PER_SEC);
+
+	return period_;
+}
+
+static double calc_pps(struct datarec *r, struct datarec *p, double period)
+{
+	__u64 packets = 0;
+	double pps = 0;
+
+	if (period > 0) {
+		packets = r->processed - p->processed;
+		pps = packets / period;
+	}
+	return pps;
+}
+
+static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
+{
+	__u64 packets = 0;
+	double pps = 0;
+
+	if (period > 0) {
+		packets = r->processed - p->processed;
+		pps = packets / period;
+	}
+	return pps;
+}
+
+static double calc_drop(struct datarec *r, struct datarec *p, double period)
+{
+	__u64 packets = 0;
+	double pps = 0;
+
+	if (period > 0) {
+		packets = r->dropped - p->dropped;
+		pps = packets / period;
+	}
+	return pps;
+}
+
+static double calc_info(struct datarec *r, struct datarec *p, double period)
 {
 	__u64 packets = 0;
 	double pps = 0;
 
 	if (period > 0) {
-		packets = r->counter - p->counter;
+		packets = r->info - p->info;
 		pps = packets / period;
 	}
 	return pps;
 }
 
-static void stats_print(struct stats_record *rec,
-			struct stats_record *prev,
+static void stats_print(struct stats_record *stats_rec,
+			struct stats_record *stats_prev,
 			bool err_only)
 {
-	double period = 0, pps = 0;
-	struct record *r, *p;
-	int i = 0;
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	int rec_i = 0, i, to_cpu;
+	double t = 0, pps = 0;
 
-	char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n";
+	/* Header */
+	printf("%-15s %-7s %-12s %-12s %-9s\n",
+	       "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
 
 	/* tracepoint: xdp:xdp_redirect_* */
 	if (err_only)
-		i = REDIR_ERROR;
-
-	for (; i < REDIR_RES_MAX; i++) {
-		r = &rec->xdp_redir[i];
-		p = &prev->xdp_redir[i];
-
-		if (p->timestamp) {
-			period = calc_period(r, p);
-			pps = calc_pps(r, p, period);
+		rec_i = REDIR_ERROR;
+
+	for (; rec_i < REDIR_RES_MAX; rec_i++) {
+		struct record_u64 *rec, *prev;
+		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+		rec  =  &stats_rec->xdp_redirect[rec_i];
+		prev = &stats_prev->xdp_redirect[rec_i];
+		t = calc_period_u64(rec, prev);
+
+		for (i = 0; i < nr_cpus; i++) {
+			struct u64rec *r = &rec->cpu[i];
+			struct u64rec *p = &prev->cpu[i];
+
+			pps = calc_pps_u64(r, p, t);
+			if (pps > 0)
+				printf(fmt1, "XDP_REDIRECT", i,
+				       rec_i ? 0.0: pps, rec_i ? pps : 0.0,
+				       err2str(rec_i));
 		}
-		printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period);
+		pps = calc_pps_u64(&rec->total, &prev->total, t);
+		printf(fmt2, "XDP_REDIRECT", "total",
+		       rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
 	}
 
 	/* tracepoint: xdp:xdp_exception */
-	for (i = 0; i < XDP_ACTION_MAX; i++) {
-		r = &rec->xdp_exception[i];
-		p = &prev->xdp_exception[i];
-		if (p->timestamp) {
-			period = calc_period(r, p);
-			pps = calc_pps(r, p, period);
+	for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+		struct record_u64 *rec, *prev;
+		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+		rec  =  &stats_rec->xdp_exception[rec_i];
+		prev = &stats_prev->xdp_exception[rec_i];
+		t = calc_period_u64(rec, prev);
+
+		for (i = 0; i < nr_cpus; i++) {
+			struct u64rec *r = &rec->cpu[i];
+			struct u64rec *p = &prev->cpu[i];
+
+			pps = calc_pps_u64(r, p, t);
+			if (pps > 0)
+				printf(fmt1, "Exception", i,
+				       0.0, pps, err2str(rec_i));
 		}
+		pps = calc_pps_u64(&rec->total, &prev->total, t);
 		if (pps > 0)
-			printf(fmt, action2str(i), "Exception",
-			       pps, pps, period);
+			printf(fmt2, "Exception", "total",
+			       0.0, pps, action2str(rec_i));
 	}
-	printf("\n");
-}
 
-static __u64 get_key32_value64_percpu(int fd, __u32 key)
-{
-	/* For percpu maps, userspace gets a value per possible CPU */
-	unsigned int nr_cpus = bpf_num_possible_cpus();
-	__u64 values[nr_cpus];
-	__u64 sum = 0;
-	int i;
-
-	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
-		fprintf(stderr,
-			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
-		return 0;
+	/* cpumap enqueue stats */
+	for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
+		char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+		char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+		struct record *rec, *prev;
+		char *info_str = "";
+		double drop, info;
+
+		rec  =  &stats_rec->xdp_cpumap_enqueue[to_cpu];
+		prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
+		t = calc_period(rec, prev);
+		for (i = 0; i < nr_cpus; i++) {
+			struct datarec *r = &rec->cpu[i];
+			struct datarec *p = &prev->cpu[i];
+
+			pps  = calc_pps(r, p, t);
+			drop = calc_drop(r, p, t);
+			info = calc_info(r, p, t);
+			if (info > 0) {
+				info_str = "bulk-average";
+				info = pps / info; /* calc average bulk size */
+			}
+			if (pps > 0)
+				printf(fmt1, "cpumap-enqueue",
+				       i, to_cpu, pps, drop, info, info_str);
+		}
+		pps = calc_pps(&rec->total, &prev->total, t);
+		if (pps > 0) {
+			drop = calc_drop(&rec->total, &prev->total, t);
+			info = calc_info(&rec->total, &prev->total, t);
+			if (info > 0) {
+				info_str = "bulk-average";
+				info = pps / info; /* calc average bulk size */
+			}
+			printf(fmt2, "cpumap-enqueue",
+			       "sum", to_cpu, pps, drop, info, info_str);
+		}
 	}
 
-	/* Sum values from each CPU */
-	for (i = 0; i < nr_cpus; i++) {
-		sum += values[i];
+	/* cpumap kthread stats */
+	{
+		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
+		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
+		struct record *rec, *prev;
+		double drop, info;
+		char *i_str = "";
+
+		rec  =  &stats_rec->xdp_cpumap_kthread;
+		prev = &stats_prev->xdp_cpumap_kthread;
+		t = calc_period(rec, prev);
+		for (i = 0; i < nr_cpus; i++) {
+			struct datarec *r = &rec->cpu[i];
+			struct datarec *p = &prev->cpu[i];
+
+			pps  = calc_pps(r, p, t);
+			drop = calc_drop(r, p, t);
+			info = calc_info(r, p, t);
+			if (info > 0)
+				i_str = "sched";
+			if (pps > 0)
+				printf(fmt1, "cpumap-kthread",
+				       i, pps, drop, info, i_str);
+		}
+		pps = calc_pps(&rec->total, &prev->total, t);
+		drop = calc_drop(&rec->total, &prev->total, t);
+		info = calc_info(&rec->total, &prev->total, t);
+		if (info > 0)
+			i_str = "sched-sum";
+		printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
 	}
-	return sum;
+
+	printf("\n");
 }
 
 static bool stats_collect(struct stats_record *rec)
@@ -222,25 +422,109 @@ static bool stats_collect(struct stats_record *rec)
 	 */
 
 	fd = map_data[0].fd; /* map0: redirect_err_cnt */
-	for (i = 0; i < REDIR_RES_MAX; i++) {
-		rec->xdp_redir[i].timestamp = gettime();
-		rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
-	}
+	for (i = 0; i < REDIR_RES_MAX; i++)
+		map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
 
 	fd = map_data[1].fd; /* map1: exception_cnt */
 	for (i = 0; i < XDP_ACTION_MAX; i++) {
-		rec->xdp_exception[i].timestamp = gettime();
-		rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i);
+		map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
 	}
 
+	fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
+	for (i = 0; i < MAX_CPUS; i++)
+		map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
+
+	fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
+	map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
+
 	return true;
 }
 
+static void *alloc_rec_per_cpu(int record_size)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	void *array;
+	size_t size;
+
+	size = record_size * nr_cpus;
+	array = malloc(size);
+	memset(array, 0, size);
+	if (!array) {
+		fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+		exit(EXIT_FAIL_MEM);
+	}
+	return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+	struct stats_record *rec;
+	int rec_sz;
+	int i;
+
+	/* Alloc main stats_record structure */
+	rec = malloc(sizeof(*rec));
+	memset(rec, 0, sizeof(*rec));
+	if (!rec) {
+		fprintf(stderr, "Mem alloc error\n");
+		exit(EXIT_FAIL_MEM);
+	}
+
+	/* Alloc stats stored per CPU for each record */
+	rec_sz = sizeof(struct u64rec);
+	for (i = 0; i < REDIR_RES_MAX; i++)
+		rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+	for (i = 0; i < XDP_ACTION_MAX; i++)
+		rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+	rec_sz = sizeof(struct datarec);
+	rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+
+	for (i = 0; i < MAX_CPUS; i++)
+		rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+	return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+	int i;
+
+	for (i = 0; i < REDIR_RES_MAX; i++)
+		free(r->xdp_redirect[i].cpu);
+
+	for (i = 0; i < XDP_ACTION_MAX; i++)
+		free(r->xdp_exception[i].cpu);
+
+	free(r->xdp_cpumap_kthread.cpu);
+
+	for (i = 0; i < MAX_CPUS; i++)
+		free(r->xdp_cpumap_enqueue[i].cpu);
+
+	free(r);
+}
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+	struct stats_record *tmp;
+
+	tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
 static void stats_poll(int interval, bool err_only)
 {
-	struct stats_record rec, prev;
+	struct stats_record *rec, *prev;
 
-	memset(&rec, 0, sizeof(rec));
+	rec  = alloc_stats_record();
+	prev = alloc_stats_record();
+	stats_collect(rec);
+
+	if (err_only)
+		printf("\n%s\n", __doc_err_only__);
 
 	/* Trick to pretty printf with thousands separators use %' */
 	setlocale(LC_NUMERIC, "en_US");
@@ -258,13 +542,15 @@ static void stats_poll(int interval, bool err_only)
 	fflush(stdout);
 
 	while (1) {
-		memcpy(&prev, &rec, sizeof(rec));
-		stats_collect(&rec);
-		stats_print_headers(err_only);
-		stats_print(&rec, &prev, err_only);
+		swap(&prev, &rec);
+		stats_collect(rec);
+		stats_print(rec, prev, err_only);
 		fflush(stdout);
 		sleep(interval);
 	}
+
+	free_stats_record(rec);
+	free_stats_record(prev);
 }
 
 static void print_bpf_prog_info(void)
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 30044bc4f389..58c2bab4ef6e 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -172,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
 {
 	char *ptr, *pptr, *tmp;
 	off_t off = 0;
-	int ret, flen, proglen, pass, ulen = 0;
+	unsigned int proglen;
+	int ret, flen, pass, ulen = 0;
 	regmatch_t pmatch[1];
 	unsigned long base;
 	regex_t regex;
@@ -199,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
 	}
 
 	ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
-	ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
+	ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx",
 		     &flen, &proglen, &pass, &base);
 	if (ret != 4) {
 		regfree(&regex);
@@ -239,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
 	}
 
 	assert(ulen == proglen);
-	printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
+	printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
 	       proglen, pass, flen);
 	printf("%lx + <x>:\n", base);
 
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 6601c95a9258..0b482c0070e0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -34,6 +34,7 @@
 /* Author: Jakub Kicinski <kubakici@wp.pl> */
 
 #include <errno.h>
+#include <fcntl.h>
 #include <fts.h>
 #include <libgen.h>
 #include <mntent.h>
@@ -433,6 +434,77 @@ ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
 	return if_indextoname(ifindex, buf);
 }
 
+static int read_sysfs_hex_int(char *path)
+{
+	char vendor_id_buf[8];
+	int len;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		p_err("Can't open %s: %s", path, strerror(errno));
+		return -1;
+	}
+
+	len = read(fd, vendor_id_buf, sizeof(vendor_id_buf));
+	close(fd);
+	if (len < 0) {
+		p_err("Can't read %s: %s", path, strerror(errno));
+		return -1;
+	}
+	if (len >= (int)sizeof(vendor_id_buf)) {
+		p_err("Value in %s too long", path);
+		return -1;
+	}
+
+	vendor_id_buf[len] = 0;
+
+	return strtol(vendor_id_buf, NULL, 0);
+}
+
+static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
+{
+	char full_path[64];
+
+	snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s",
+		 devname, entry_name);
+
+	return read_sysfs_hex_int(full_path);
+}
+
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
+{
+	char devname[IF_NAMESIZE];
+	int vendor_id;
+	int device_id;
+
+	if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
+		p_err("Can't get net device name for ifindex %d: %s", ifindex,
+		      strerror(errno));
+		return NULL;
+	}
+
+	vendor_id = read_sysfs_netdev_hex_int(devname, "vendor");
+	if (vendor_id < 0) {
+		p_err("Can't get device vendor id for %s", devname);
+		return NULL;
+	}
+
+	switch (vendor_id) {
+	case 0x19ee:
+		device_id = read_sysfs_netdev_hex_int(devname, "device");
+		if (device_id != 0x4000 &&
+		    device_id != 0x6000 &&
+		    device_id != 0x6003)
+			p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
+		return "NFP-6xxx";
+	default:
+		p_err("Can't get bfd arch name for device vendor id 0x%04x",
+		      vendor_id);
+		return NULL;
+	}
+}
+
 void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
 {
 	char name[IF_NAMESIZE];
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 57d32e8a1391..87439320ef70 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...)
 	return 0;
 }
 
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		       const char *arch)
 {
 	disassembler_ftype disassemble;
 	struct disassemble_info info;
@@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
 	else
 		init_disassemble_info(&info, stdout,
 				      (fprintf_ftype) fprintf);
+
+	/* Update architecture info for offload. */
+	if (arch) {
+		const bfd_arch_info_type *inf = bfd_scan_arch(arch);
+
+		if (inf) {
+			bfdf->arch_info = inf;
+		} else {
+			p_err("No libfd support for %s", arch);
+			return;
+		}
+	}
+
 	info.arch = bfd_get_arch(bfdf);
 	info.mach = bfd_get_mach(bfdf);
 	info.buffer = image;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 65b526fe6e7e..b8e9584d6246 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -121,7 +121,10 @@ int do_cgroup(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes);
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		       const char *arch);
 void print_hex_data_json(uint8_t *data, size_t len);
 
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
+
 #endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 8d7db9d6b9cd..f95fa67bb498 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -66,6 +66,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_HASH_OF_MAPS]	= "hash_of_maps",
 	[BPF_MAP_TYPE_DEVMAP]		= "devmap",
 	[BPF_MAP_TYPE_SOCKMAP]		= "sockmap",
+	[BPF_MAP_TYPE_CPUMAP]		= "cpumap",
 };
 
 static unsigned int get_possible_cpus(void)
@@ -428,6 +429,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 
 	jsonw_name(json_wtr, "flags");
 	jsonw_printf(json_wtr, "%#x", info->map_flags);
+
+	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
 	jsonw_uint_field(json_wtr, "bytes_key", info->key_size);
 	jsonw_uint_field(json_wtr, "bytes_value", info->value_size);
 	jsonw_uint_field(json_wtr, "max_entries", info->max_entries);
@@ -469,7 +473,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 	if (*info->name)
 		printf("name %s  ", info->name);
 
-	printf("flags 0x%x\n", info->map_flags);
+	printf("flags 0x%x", info->map_flags);
+	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+	printf("\n");
 	printf("\tkey %uB  value %uB  max_entries %u",
 	       info->key_size, info->value_size, info->max_entries);
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 099e21cf1b5c..e8e2baaf93c2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -776,7 +776,17 @@ static int do_dump(int argc, char **argv)
 		}
 	} else {
 		if (member_len == &info.jited_prog_len) {
-			disasm_print_insn(buf, *member_len, opcodes);
+			const char *name = NULL;
+
+			if (info.ifindex) {
+				name = ifindex_to_bfd_name_ns(info.ifindex,
+							      info.netns_dev,
+							      info.netns_ino);
+				if (!name)
+					goto err_free;
+			}
+
+			disasm_print_insn(buf, *member_len, opcodes, name);
 		} else {
 			kernel_syms_load(&dd);
 			if (json_output)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69f96af4a569..af1f49ad8b88 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -900,6 +900,9 @@ struct xdp_md {
 	__u32 data;
 	__u32 data_end;
 	__u32 data_meta;
+	/* Below access go through struct xdp_rxq_info */
+	__u32 ingress_ifindex; /* rxq->dev->ifindex */
+	__u32 rx_queue_index;  /* rxq->queue_index  */
 };
 
 enum sk_action {
@@ -935,6 +938,9 @@ struct bpf_map_info {
 	__u32 max_entries;
 	__u32 map_flags;
 	char  name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -956,6 +962,12 @@ struct bpf_sock_ops {
 	__u32 local_ip6[4];	/* Stored in network byte order */
 	__u32 remote_port;	/* Stored in network byte order */
 	__u32 local_port;	/* stored in host byte order */
+	__u32 is_fullsock;	/* Some TCP fields are only valid if
+				 * there is a full socket. If not, the
+				 * fields read as zero.
+				 */
+	__u32 snd_cwnd;
+	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 };
 
 /* List of known BPF sock_ops operators.
@@ -1010,7 +1022,8 @@ struct bpf_perf_event_value {
 #define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
 
 struct bpf_cgroup_dev_ctx {
-	__u32 access_type; /* (access << 16) | type */
+	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+	__u32 access_type;
 	__u32 major;
 	__u32 minor;
 };
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 541d9d7fad5a..1e09d77f1948 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -3,3 +3,10 @@ test_maps
 test_lru_map
 test_lpm_map
 test_tag
+FEATURE-DUMP.libbpf
+fixdep
+test_align
+test_dev_cgroup
+test_progs
+test_verifier_log
+feature
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a8aa7e251c8e..3a44b655d852 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
+	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+	sample_map_ret0.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c
new file mode 100644
index 000000000000..0756303676ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_map_ret0.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(long),
+	.max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(long),
+	.max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+	__u64 key64 = 0;
+	__u32 key = 0;
+	long *value;
+
+	value = bpf_map_lookup_elem(&htab, &key);
+	if (!value)
+		return 1;
+	value = bpf_map_lookup_elem(&array, &key64);
+	if (!value)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index f61480641b6e..081510853c6d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -521,6 +521,126 @@ static void test_lpm_delete(void)
 	close(map_fd);
 }
 
+static void test_lpm_get_next_key(void)
+{
+	struct bpf_lpm_trie_key *key_p, *next_key_p;
+	size_t key_size;
+	__u32 value = 0;
+	int map_fd;
+
+	key_size = sizeof(*key_p) + sizeof(__u32);
+	key_p = alloca(key_size);
+	next_key_p = alloca(key_size);
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
+				100, BPF_F_NO_PREALLOC);
+	assert(map_fd >= 0);
+
+	/* empty tree. get_next_key should return ENOENT */
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* get and verify the first key, get the second one should fail. */
+	key_p->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168);
+
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* no exact matching key should get the first one in post order. */
+	key_p->prefixlen = 8;
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168);
+
+	/* add one more element (total two) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total three) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total four) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* no exact matching key should return the first one in post order */
+	key_p->prefixlen = 22;
+	inet_pton(AF_INET, "192.168.1.0", key_p->data);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
+
+	close(map_fd);
+}
+
 int main(void)
 {
 	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
@@ -545,6 +665,8 @@ int main(void)
 
 	test_lpm_delete();
 
+	test_lpm_get_next_key();
+
 	printf("test_lpm: OK\n");
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e3c750f17cb8..833b9c1ec450 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -20,6 +20,7 @@ import os
 import pprint
 import random
 import string
+import struct
 import subprocess
 import time
 
@@ -156,6 +157,14 @@ def bpftool_prog_list(expected=None, ns=""):
                  (len(progs), expected))
     return progs
 
+def bpftool_map_list(expected=None, ns=""):
+    _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    if expected is not None:
+        if len(maps) != expected:
+            fail(True, "%d BPF maps loaded, expected %d" %
+                 (len(maps), expected))
+    return maps
+
 def bpftool_prog_list_wait(expected=0, n_retry=20):
     for i in range(n_retry):
         nprogs = len(bpftool_prog_list())
@@ -164,6 +173,14 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
         time.sleep(0.05)
     raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
 
+def bpftool_map_list_wait(expected=0, n_retry=20):
+    for i in range(n_retry):
+        nmaps = len(bpftool_map_list())
+        if nmaps == expected:
+            return
+        time.sleep(0.05)
+    raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
 def ip(args, force=False, JSON=True, ns="", fail=True):
     if force:
         args = "-force " + args
@@ -193,6 +210,26 @@ def mknetns(n_retry=10):
             return name
     return None
 
+def int2str(fmt, val):
+    ret = []
+    for b in struct.pack(fmt, val):
+        ret.append(int(b))
+    return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+    inttab = []
+    for i in strtab:
+        inttab.append(int(i, 16))
+    ba = bytearray(inttab)
+    if len(strtab) == 4:
+        fmt = "I"
+    elif len(strtab) == 8:
+        fmt = "Q"
+    else:
+        raise Exception("String array of len %d can't be unpacked to an int" %
+                        (len(strtab)))
+    return struct.unpack(fmt, ba)[0]
+
 class DebugfsDir:
     """
     Class for accessing DebugFS directories as a dictionary.
@@ -311,13 +348,13 @@ class NetdevSim:
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
 
-    def set_xdp(self, bpf, mode, force=False, fail=True):
+    def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True):
         return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
-                  force=force, fail=fail)
+                  force=force, JSON=JSON, fail=fail)
 
-    def unset_xdp(self, mode, force=False, fail=True):
+    def unset_xdp(self, mode, force=False, JSON=True, fail=True):
         return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
-                  force=force, fail=fail)
+                  force=force, JSON=JSON, fail=fail)
 
     def ip_link_show(self, xdp):
         _, link = ip("link show dev %s" % (self['ifname']))
@@ -390,12 +427,16 @@ class NetdevSim:
 
 ################################################################################
 def clean_up():
+    global files, netns, devs
+
     for dev in devs:
         dev.remove()
     for f in files:
         cmd("rm -f %s" % (f))
     for ns in netns:
         cmd("ip netns delete %s" % (ns))
+    files = []
+    netns = []
 
 def pin_prog(file_name, idx=0):
     progs = bpftool_prog_list(expected=(idx + 1))
@@ -405,16 +446,31 @@ def pin_prog(file_name, idx=0):
 
     return file_name, bpf_pinned(file_name)
 
-def check_dev_info(other_ns, ns, pin_file=None, removed=False):
-    if removed:
-        bpftool_prog_list(expected=0)
-        ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
-        fail(ret == 0, "Showing prog with removed device did not fail")
-        fail(err["error"].find("No such device") == -1,
-             "Showing prog with removed device expected ENODEV, error is %s" %
-             (err["error"]))
-        return
-    progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+def pin_map(file_name, idx=0, expected=1):
+    maps = bpftool_map_list(expected=expected)
+    m = maps[idx]
+    bpftool("map pin id %d %s" % (m["id"], file_name))
+    files.append(file_name)
+
+    return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+    bpftool_prog_list(expected=0)
+    ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+    fail(ret == 0, "Showing prog with removed device did not fail")
+    fail(err["error"].find("No such device") == -1,
+         "Showing prog with removed device expected ENODEV, error is %s" %
+         (err["error"]))
+
+    bpftool_map_list(expected=0)
+    ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+    fail(ret == 0, "Showing map with removed device did not fail")
+    fail(err["error"].find("No such device") == -1,
+         "Showing map with removed device expected ENODEV, error is %s" %
+         (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+    progs = bpftool_prog_list(expected=1, ns=ns)
     prog = progs[0]
 
     fail("dev" not in prog.keys(), "Device parameters not reported")
@@ -423,16 +479,17 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False):
     fail("ns_dev" not in dev.keys(), "Device parameters not reported")
     fail("ns_inode" not in dev.keys(), "Device parameters not reported")
 
-    if not removed and not other_ns:
+    if not other_ns:
         fail("ifname" not in dev.keys(), "Ifname not reported")
         fail(dev["ifname"] != sim["ifname"],
              "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
     else:
         fail("ifname" in dev.keys(), "Ifname is reported for other ns")
-        if removed:
-            fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
-            fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
-            fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
+    maps = bpftool_map_list(expected=2, ns=ns)
+    for m in maps:
+        fail("dev" not in m.keys(), "Device parameters not reported")
+        fail(dev != m["dev"], "Map's device different than program's")
 
 # Parse command line
 parser = argparse.ArgumentParser()
@@ -464,7 +521,7 @@ if out.find("/sys/kernel/debug type debugfs") == -1:
     cmd("mount -t debugfs none /sys/kernel/debug")
 
 # Check samples are compiled
-samples = ["sample_ret0.o"]
+samples = ["sample_ret0.o", "sample_map_ret0.o"]
 for s in samples:
     ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
     skip(ret != 0, "sample %s/%s not found, please compile it" %
@@ -739,8 +796,9 @@ try:
     bpftool_prog_list_wait(expected=0)
 
     sim = NetdevSim()
-    sim.set_ethtool_tc_offloads(True)
-    sim.set_xdp(obj, "offload")
+    map_obj = bpf_obj("sample_map_ret0.o")
+    start_test("Test loading program with maps...")
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
 
     start_test("Test bpftool bound info reporting (own ns)...")
     check_dev_info(False, "")
@@ -757,11 +815,111 @@ try:
     sim.set_ns("")
     check_dev_info(False, "")
 
-    pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
+    prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+    map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
     sim.remove()
 
     start_test("Test bpftool bound info reporting (removed dev)...")
-    check_dev_info(True, "", pin_file=pin_file, removed=True)
+    check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+
+    start_test("Test map update (no flags)...")
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+    maps = bpftool_map_list(expected=2)
+    array = maps[0] if maps[0]["type"] == "array" else maps[1]
+    htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+    for m in maps:
+        for i in range(2):
+            bpftool("map update id %d key %s value %s" %
+                    (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+    for m in maps:
+        ret, _ = bpftool("map update id %d key %s value %s" %
+                         (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+                         fail=False)
+        fail(ret == 0, "added too many entries")
+
+    start_test("Test map update (exists)...")
+    for m in maps:
+        for i in range(2):
+            bpftool("map update id %d key %s value %s exist" %
+                    (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+    for m in maps:
+        ret, err = bpftool("map update id %d key %s value %s exist" %
+                           (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+                           fail=False)
+        fail(ret == 0, "updated non-existing key")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map update (noexist)...")
+    for m in maps:
+        for i in range(2):
+            ret, err = bpftool("map update id %d key %s value %s noexist" %
+                               (m["id"], int2str("I", i), int2str("Q", i * 3)),
+                               fail=False)
+        fail(ret == 0, "updated existing key")
+        fail(err["error"].find("File exists") == -1,
+             "expected EEXIST, error is '%s'" % (err["error"]))
+
+    start_test("Test map dump...")
+    for m in maps:
+        _, entries = bpftool("map dump id %d" % (m["id"]))
+        for i in range(2):
+            key = str2int(entries[i]["key"])
+            fail(key != i, "expected key %d, got %d" % (key, i))
+            val = str2int(entries[i]["value"])
+            fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+    start_test("Test map getnext...")
+    for m in maps:
+        _, entry = bpftool("map getnext id %d" % (m["id"]))
+        key = str2int(entry["next_key"])
+        fail(key != 0, "next key %d, expected %d" % (key, 0))
+        _, entry = bpftool("map getnext id %d key %s" %
+                           (m["id"], int2str("I", 0)))
+        key = str2int(entry["next_key"])
+        fail(key != 1, "next key %d, expected %d" % (key, 1))
+        ret, err = bpftool("map getnext id %d key %s" %
+                           (m["id"], int2str("I", 1)), fail=False)
+        fail(ret == 0, "got next key past the end of map")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map delete (htab)...")
+    for i in range(2):
+        bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+    start_test("Test map delete (array)...")
+    for i in range(2):
+        ret, err = bpftool("map delete id %d key %s" %
+                           (htab["id"], int2str("I", i)), fail=False)
+        fail(ret == 0, "removed entry from an array")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map remove...")
+    sim.unset_xdp("offload")
+    bpftool_map_list_wait(expected=0)
+    sim.remove()
+
+    sim = NetdevSim()
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+    sim.remove()
+    bpftool_map_list_wait(expected=0)
+
+    start_test("Test map creation fail path...")
+    sim = NetdevSim()
+    sim.dfs["bpf_map_accept"] = "N"
+    ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+    fail(ret == 0,
+         "netdevsim didn't refuse to create a map with offload disabled")
 
     print("%s: OK" % (os.path.basename(__file__)))
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 38c0f850f315..fb82d29ee863 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -29,6 +29,7 @@
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
 #include <linux/bpf.h>
+#include <linux/if_ether.h>
 
 #include <bpf/bpf.h>
 
@@ -49,6 +50,8 @@
 #define MAX_INSNS	512
 #define MAX_FIXUPS	8
 #define MAX_NR_MAPS	4
+#define POINTER_VALUE	0xcafe4all
+#define TEST_DATA_LEN	64
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
@@ -62,6 +65,7 @@ struct bpf_test {
 	int fixup_map_in_map[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
+	uint32_t retval;
 	enum {
 		UNDEF,
 		ACCEPT,
@@ -95,6 +99,94 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = -3,
+	},
+	{
+		"DIV32 by 0, zero check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV32 by 0, zero check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 by 0, zero check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 by 0, zero check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 by 0, zero check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD64 by 0, zero check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"empty prog",
+		.insns = {
+		},
+		.errstr = "last insn is not an exit or jmp",
+		.result = REJECT,
+	},
+	{
+		"only exit insn",
+		.insns = {
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
 	},
 	{
 		"unreachable",
@@ -210,6 +302,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"test8 ld_imm64",
@@ -517,6 +610,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
+		.retval = POINTER_VALUE,
 	},
 	{
 		"check valid spill/fill, skb mark",
@@ -803,6 +897,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = -ENOENT,
 	},
 	{
 		"jump test 4",
@@ -1823,6 +1918,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 0xfaceb00c,
 	},
 	{
 		"PTR_TO_STACK store/load - bad alignment on off",
@@ -1881,6 +1977,7 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.errstr_unpriv = "R0 leaks addr",
+		.retval = POINTER_VALUE,
 	},
 	{
 		"unpriv: add const to pointer",
@@ -2054,6 +2151,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_get_hash_recalc),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
@@ -2841,6 +2939,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test12 (and, good access)",
@@ -2865,6 +2964,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test13 (branches, good access)",
@@ -2895,6 +2995,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
@@ -2918,6 +3019,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test15 (spill with xadd)",
@@ -3204,6 +3306,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test28 (marking on <=, bad access)",
@@ -5823,6 +5926,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 0 /* csum_diff of 64-byte packet */,
 	},
 	{
 		"helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
@@ -6191,6 +6295,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 42 /* ultimate return value */,
 	},
 	{
 		"ld_ind: check calling conv, r1",
@@ -6262,6 +6367,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"check bpf_perf_event_data->sample_period byte load permitted",
@@ -7249,6 +7355,7 @@ static struct bpf_test tests[] = {
 		},
 		.fixup_map1 = { 3 },
 		.result = ACCEPT,
+		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
 		.errstr_unpriv = "R0 leaks addr as return value"
 	},
@@ -7269,6 +7376,7 @@ static struct bpf_test tests[] = {
 		},
 		.fixup_map1 = { 3 },
 		.result = ACCEPT,
+		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
 		.errstr_unpriv = "R0 leaks addr as return value"
 	},
@@ -7710,6 +7818,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -8851,6 +8960,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"calls: overlapping caller/callee",
@@ -9046,6 +9156,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN,
 	},
 	{
 		"calls: callee using args1",
@@ -9058,6 +9169,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "allowed for root only",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = POINTER_VALUE,
 	},
 	{
 		"calls: callee using wrong args2",
@@ -9088,6 +9200,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "allowed for root only",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
 	},
 	{
 		"calls: callee changing pkt pointers",
@@ -9136,6 +9249,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN + TEST_DATA_LEN,
 	},
 	{
 		"calls: calls with stack arith",
@@ -9154,6 +9268,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 42,
 	},
 	{
 		"calls: calls with misaligned stack access",
@@ -9187,6 +9302,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 43,
 	},
 	{
 		"calls: calls control flow, jump test 2",
@@ -9679,6 +9795,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
 		.result = ACCEPT,
+		.retval = 42,
 	},
 	{
 		"calls: write into callee stack frame",
@@ -10290,6 +10407,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = POINTER_VALUE,
 	},
 	{
 		"calls: pkt_ptr spill into caller stack 2",
@@ -10355,6 +10473,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"calls: pkt_ptr spill into caller stack 4",
@@ -10388,6 +10507,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"calls: pkt_ptr spill into caller stack 5",
@@ -10796,10 +10916,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int fd_prog, expected_ret, reject_from_alignment;
 	struct bpf_insn *prog = test->insns;
 	int prog_len = probe_filter_length(prog);
+	char data_in[TEST_DATA_LEN] = {};
 	int prog_type = test->prog_type;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
-	int i;
+	uint32_t retval;
+	int i, err;
 
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		map_fds[i] = -1;
@@ -10842,6 +10964,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		}
 	}
 
+	if (fd_prog >= 0) {
+		err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in),
+					NULL, NULL, &retval, NULL);
+		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
+			printf("Unexpected bpf_prog_test_run error\n");
+			goto fail_log;
+		}
+		if (!err && retval != test->retval &&
+		    test->retval != POINTER_VALUE) {
+			printf("FAIL retval %d != %d\n", retval, test->retval);
+			goto fail_log;
+		}
+	}
 	(*passes)++;
 	printf("OK%s\n", reject_from_alignment ?
 	       " (NOTE: reject due to unknown alignment)" : "");